add result preview and exports

This commit is contained in:
2026-05-08 21:58:39 +08:00
parent 491009c0d7
commit fe6fcf9826
3 changed files with 495 additions and 204 deletions

View File

@@ -1,6 +1,6 @@
# HIS_Sur_Data_Deal # HIS_Sur_Data_Deal
网页端检测数据处理工具。上传 `待处理检测数据.zip` 后,服务会自动识别 V1/V2 数据结构,调用原处理脚本生成 Excel返回结果压缩包。 网页端检测数据处理工具。上传 `待处理检测数据.zip` 后,服务会自动识别 V1/V2 数据结构,调用原处理脚本生成 Excel在网页中展示结果摘要、工作表统计和部分数据预览;用户可按需导出单个 Excel 或全部 Excel 压缩包。
## 本地运行 ## 本地运行
@@ -31,3 +31,4 @@ V1zip 解压后包含 `Patients_info.csv`、`Tests_List`、`Tests_Detail_List
V2zip 解压后包含 `Patients_info.csv`,并按患者目录分别保存检测汇总和具体检测,输出多个患者 Excel。 V2zip 解压后包含 `Patients_info.csv`,并按患者目录分别保存检测汇总和具体检测,输出多个患者 Excel。
导出的压缩包默认只包含 Excel 结果,不包含处理日志。

View File

@@ -3,14 +3,20 @@ import shutil
import tempfile import tempfile
import uuid import uuid
from pathlib import Path from pathlib import Path
from urllib.parse import quote
from fastapi import FastAPI, File, Form, HTTPException, UploadFile from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile
from fastapi.responses import FileResponse, HTMLResponse from fastapi.responses import FileResponse, HTMLResponse
from .processor import ProcessingError, run_processing from .processor import (
ProcessingError,
ProcessingResult,
create_result_zip,
find_output_file,
run_processing,
)
APP_ROOT = Path(__file__).resolve().parent
WORK_ROOT = Path(tempfile.gettempdir()) / "his_sur_data_deal_jobs" WORK_ROOT = Path(tempfile.gettempdir()) / "his_sur_data_deal_jobs"
WORK_ROOT.mkdir(parents=True, exist_ok=True) WORK_ROOT.mkdir(parents=True, exist_ok=True)
@@ -19,185 +25,48 @@ app = FastAPI(title="检测数据处理")
@app.get("/", response_class=HTMLResponse) @app.get("/", response_class=HTMLResponse)
def index() -> str: def index() -> str:
return """ return _page_shell(
<!doctype html> """
<html lang="zh-CN"> <section class="panel">
<head> <form action="/process" method="post" enctype="multipart/form-data">
<meta charset="utf-8"> <div>
<meta name="viewport" content="width=device-width, initial-scale=1"> <label for="file">待处理检测数据.zip</label>
<title>检测数据处理</title> <input id="file" name="file" type="file" accept=".zip,application/zip" required>
<style> </div>
:root { <div class="grid">
color-scheme: light; <div>
--bg: #f6f7f9; <label for="mode">处理模式</label>
--panel: #ffffff; <select id="mode" name="mode">
--text: #1f2937; <option value="auto">自动识别</option>
--muted: #64748b; <option value="v1">V1 整批汇总</option>
--line: #d7dde5; <option value="v2">V2 单患者文件</option>
--primary: #146c5c; </select>
--primary-dark: #0f574b; </div>
--danger: #b42318; <div>
} <label for="data_type">患者编号类型</label>
* { box-sizing: border-box; } <select id="data_type" name="data_type">
body { <option value="pat_no">患者号 pat_no</option>
margin: 0; <option value="zhuyuanhao">住院号 zhuyuanhao</option>
min-height: 100vh; </select>
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Microsoft YaHei", sans-serif; </div>
background: var(--bg); <div>
color: var(--text); <label for="result_name">结果文件名</label>
} <input id="result_name" name="result_name" type="text" value="Result">
.shell { </div>
width: min(960px, calc(100vw - 32px)); </div>
margin: 0 auto; <div class="checks">
padding: 36px 0; <label><input type="checkbox" name="show_not_match" value="true" checked> 输出未匹配内容</label>
} <label><input type="checkbox" name="show_all_infos" value="true"> 输出全部检测记录</label>
header { </div>
display: flex; <button type="submit">开始处理</button>
align-items: flex-end; </form>
justify-content: space-between; <div class="note">V1 适用于含有 Patients_info.csv、Tests_List、Tests_Detail_List 的批量数据V2 适用于每个患者单独目录的数据。</div>
gap: 24px; </section>
margin-bottom: 24px; """
} )
h1 {
margin: 0;
font-size: 28px;
line-height: 1.2;
font-weight: 750;
letter-spacing: 0;
}
.sub {
margin-top: 8px;
color: var(--muted);
font-size: 14px;
}
.panel {
background: var(--panel);
border: 1px solid var(--line);
border-radius: 8px;
padding: 24px;
box-shadow: 0 10px 26px rgba(17, 24, 39, 0.06);
}
form {
display: grid;
gap: 18px;
}
label {
display: block;
margin-bottom: 8px;
font-size: 14px;
font-weight: 650;
}
input[type="file"], select, input[type="text"] {
width: 100%;
border: 1px solid var(--line);
border-radius: 6px;
padding: 11px 12px;
font: inherit;
background: #fff;
color: var(--text);
}
.grid {
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 14px;
}
.checks {
display: flex;
flex-wrap: wrap;
gap: 16px;
color: var(--text);
font-size: 14px;
}
.checks label {
display: inline-flex;
align-items: center;
gap: 8px;
margin: 0;
font-weight: 500;
}
button {
width: fit-content;
min-width: 148px;
border: 0;
border-radius: 6px;
padding: 12px 18px;
background: var(--primary);
color: #fff;
font: inherit;
font-weight: 700;
cursor: pointer;
}
button:hover { background: var(--primary-dark); }
.note {
color: var(--muted);
font-size: 13px;
line-height: 1.6;
margin-top: 18px;
border-top: 1px solid var(--line);
padding-top: 16px;
}
.error {
margin-bottom: 16px;
color: var(--danger);
font-weight: 650;
}
@media (max-width: 720px) {
header { display: block; }
.grid { grid-template-columns: 1fr; }
button { width: 100%; }
}
</style>
</head>
<body>
<main class="shell">
<header>
<div>
<h1>检测数据处理</h1>
<div class="sub">上传“待处理检测数据.zip”处理完成后自动下载结果压缩包。</div>
</div>
</header>
<section class="panel">
<form action="/process" method="post" enctype="multipart/form-data">
<div>
<label for="file">待处理检测数据.zip</label>
<input id="file" name="file" type="file" accept=".zip,application/zip" required>
</div>
<div class="grid">
<div>
<label for="mode">处理模式</label>
<select id="mode" name="mode">
<option value="auto">自动识别</option>
<option value="v1">V1 整批汇总</option>
<option value="v2">V2 单患者文件</option>
</select>
</div>
<div>
<label for="data_type">患者编号类型</label>
<select id="data_type" name="data_type">
<option value="pat_no">患者号 pat_no</option>
<option value="zhuyuanhao">住院号 zhuyuanhao</option>
</select>
</div>
<div>
<label for="result_name">结果文件名</label>
<input id="result_name" name="result_name" type="text" value="Result">
</div>
</div>
<div class="checks">
<label><input type="checkbox" name="show_not_match" value="true" checked> 输出未匹配内容</label>
<label><input type="checkbox" name="show_all_infos" value="true"> 输出全部检测记录</label>
</div>
<button type="submit">开始处理</button>
</form>
<div class="note">V1 适用于含有 Patients_info.csv、Tests_List、Tests_Detail_List 的批量数据V2 适用于每个患者单独目录的数据。</div>
</section>
</main>
</body>
</html>
"""
@app.post("/process") @app.post("/process", response_class=HTMLResponse)
async def process( async def process(
file: UploadFile = File(...), file: UploadFile = File(...),
mode: str = Form("auto"), mode: str = Form("auto"),
@@ -205,7 +74,7 @@ async def process(
result_name: str = Form("Result"), result_name: str = Form("Result"),
show_not_match: str | None = Form(None), show_not_match: str | None = Form(None),
show_all_infos: str | None = Form(None), show_all_infos: str | None = Form(None),
) -> FileResponse: ) -> str:
if not file.filename or not file.filename.lower().endswith(".zip"): if not file.filename or not file.filename.lower().endswith(".zip"):
raise HTTPException(status_code=400, detail="请上传 zip 文件。") raise HTTPException(status_code=400, detail="请上传 zip 文件。")
@@ -216,7 +85,7 @@ async def process(
with upload_path.open("wb") as out: with upload_path.open("wb") as out:
shutil.copyfileobj(file.file, out) shutil.copyfileobj(file.file, out)
result_zip = run_processing( result = run_processing(
zip_path=upload_path, zip_path=upload_path,
job_dir=job_dir, job_dir=job_dir,
mode=mode, mode=mode,
@@ -226,11 +95,20 @@ async def process(
show_all_infos=show_all_infos == "true", show_all_infos=show_all_infos == "true",
) )
except ProcessingError as exc: except ProcessingError as exc:
safe_detail = html.escape(str(exc)) raise HTTPException(status_code=400, detail=str(exc)) from exc
raise HTTPException(status_code=400, detail=safe_detail) from exc
except Exception as exc: except Exception as exc:
raise HTTPException(status_code=500, detail=f"处理失败:{exc}") from exc raise HTTPException(status_code=500, detail=f"处理失败:{exc}") from exc
return _render_result(result)
@app.get("/download/all/{job_id}")
def download_all(job_id: str) -> FileResponse:
job_dir = _get_job_dir(job_id)
try:
result_zip = create_result_zip(job_dir)
except ProcessingError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return FileResponse( return FileResponse(
result_zip, result_zip,
media_type="application/zip", media_type="application/zip",
@@ -238,7 +116,344 @@ async def process(
) )
@app.get("/download/file/{job_id}")
def download_file(job_id: str, path: str = Query(...)) -> FileResponse:
job_dir = _get_job_dir(job_id)
try:
target = find_output_file(job_dir, path)
except ProcessingError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return FileResponse(
target,
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
filename=target.name,
)
@app.get("/health") @app.get("/health")
def health() -> dict[str, str]: def health() -> dict[str, str]:
return {"status": "ok"} return {"status": "ok"}
def _get_job_dir(job_id: str) -> Path:
if not job_id.isalnum():
raise HTTPException(status_code=404, detail="结果不存在。")
job_dir = (WORK_ROOT / job_id).resolve()
if not str(job_dir).startswith(str(WORK_ROOT.resolve())) or not job_dir.exists():
raise HTTPException(status_code=404, detail="结果不存在。")
return job_dir
def _render_result(result: ProcessingResult) -> str:
total_sheets = sum(len(file.sheets) for file in result.files)
total_rows = sum(max(sheet.rows - 1, 0) for file in result.files for sheet in file.sheets)
file_items = "\n".join(_render_file(file, result.job_id) for file in result.files)
body = f"""
<section class="summary">
<div class="metric"><span>处理模式</span><strong>{html.escape(result.mode.upper())}</strong></div>
<div class="metric"><span>Excel 文件</span><strong>{len(result.files)}</strong></div>
<div class="metric"><span>工作表</span><strong>{total_sheets}</strong></div>
<div class="metric"><span>数据行</span><strong>{total_rows}</strong></div>
</section>
<section class="actions">
<a class="button" href="/download/all/{html.escape(result.job_id)}">导出全部 Excel</a>
<a class="ghost" href="/">继续处理新文件</a>
</section>
<section class="results">
{file_items}
</section>
"""
return _page_shell(body, subtitle="处理完成,可先查看结果摘要和部分预览,再选择导出。")
def _render_file(file, job_id: str) -> str:
sheet_items = "\n".join(_render_sheet(sheet) for sheet in file.sheets[:6])
more = ""
if len(file.sheets) > 6:
more = f'<div class="more">还有 {len(file.sheets) - 6} 个工作表未展开预览,可导出 Excel 查看完整内容。</div>'
file_url = f"/download/file/{html.escape(job_id)}?path={quote(file.relpath)}"
return f"""
<article class="file-block">
<div class="file-head">
<div>
<h2>{html.escape(file.filename)}</h2>
<p>{len(file.sheets)} 个工作表</p>
</div>
<a class="small-button" href="{file_url}">导出此 Excel</a>
</div>
{sheet_items}
{more}
</article>
"""
def _render_sheet(sheet) -> str:
preview = sheet.preview[:6]
table = '<div class="empty">此工作表没有可预览的数据。</div>'
if preview:
max_cols = min(max((len(row) for row in preview), default=0), 12)
rows = []
for index, row in enumerate(preview):
cells = []
for value in (row + [""] * max_cols)[:max_cols]:
tag = "th" if index == 0 else "td"
cells.append(f"<{tag}>{html.escape(value)}</{tag}>")
rows.append("<tr>" + "".join(cells) + "</tr>")
table = f"<div class=\"table-wrap\"><table>{''.join(rows)}</table></div>"
return f"""
<details class="sheet" open>
<summary>
<span>{html.escape(sheet.name)}</span>
<small>{sheet.rows} 行 · {sheet.columns} 列</small>
</summary>
{table}
</details>
"""
def _page_shell(body: str, subtitle: str = "上传“待处理检测数据.zip”处理完成后在网页中查看结果。") -> str:
return f"""
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>检测数据处理</title>
<style>
:root {{
color-scheme: light;
--bg: #f6f7f9;
--panel: #ffffff;
--text: #1f2937;
--muted: #64748b;
--line: #d7dde5;
--primary: #146c5c;
--primary-dark: #0f574b;
--soft: #eef7f5;
}}
* {{ box-sizing: border-box; }}
body {{
margin: 0;
min-height: 100vh;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Microsoft YaHei", sans-serif;
background: var(--bg);
color: var(--text);
}}
.shell {{
width: min(1120px, calc(100vw - 32px));
margin: 0 auto;
padding: 36px 0;
}}
header {{
display: flex;
align-items: flex-end;
justify-content: space-between;
gap: 24px;
margin-bottom: 24px;
}}
h1 {{
margin: 0;
font-size: 28px;
line-height: 1.2;
font-weight: 750;
letter-spacing: 0;
}}
.sub {{
margin-top: 8px;
color: var(--muted);
font-size: 14px;
}}
.panel, .file-block {{
background: var(--panel);
border: 1px solid var(--line);
border-radius: 8px;
padding: 24px;
box-shadow: 0 10px 26px rgba(17, 24, 39, 0.06);
}}
form {{
display: grid;
gap: 18px;
}}
label {{
display: block;
margin-bottom: 8px;
font-size: 14px;
font-weight: 650;
}}
input[type="file"], select, input[type="text"] {{
width: 100%;
border: 1px solid var(--line);
border-radius: 6px;
padding: 11px 12px;
font: inherit;
background: #fff;
color: var(--text);
}}
.grid {{
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 14px;
}}
.checks {{
display: flex;
flex-wrap: wrap;
gap: 16px;
color: var(--text);
font-size: 14px;
}}
.checks label {{
display: inline-flex;
align-items: center;
gap: 8px;
margin: 0;
font-weight: 500;
}}
button, .button, .small-button, .ghost {{
display: inline-flex;
align-items: center;
justify-content: center;
min-height: 42px;
border-radius: 6px;
padding: 0 16px;
font: inherit;
font-weight: 700;
text-decoration: none;
cursor: pointer;
}}
button, .button, .small-button {{
border: 0;
background: var(--primary);
color: #fff;
}}
button:hover, .button:hover, .small-button:hover {{ background: var(--primary-dark); }}
.ghost {{
border: 1px solid var(--line);
color: var(--text);
background: #fff;
}}
.note {{
color: var(--muted);
font-size: 13px;
line-height: 1.6;
margin-top: 18px;
border-top: 1px solid var(--line);
padding-top: 16px;
}}
.summary {{
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: 14px;
margin-bottom: 16px;
}}
.metric {{
border: 1px solid var(--line);
border-radius: 8px;
background: #fff;
padding: 16px;
}}
.metric span {{
display: block;
color: var(--muted);
font-size: 13px;
margin-bottom: 8px;
}}
.metric strong {{
font-size: 24px;
line-height: 1;
}}
.actions {{
display: flex;
gap: 12px;
margin: 0 0 18px;
}}
.results {{
display: grid;
gap: 16px;
}}
.file-head {{
display: flex;
align-items: center;
justify-content: space-between;
gap: 16px;
margin-bottom: 16px;
}}
h2 {{
margin: 0;
font-size: 18px;
letter-spacing: 0;
}}
.file-head p {{
margin: 6px 0 0;
color: var(--muted);
font-size: 13px;
}}
.sheet {{
border: 1px solid var(--line);
border-radius: 8px;
margin-top: 10px;
overflow: hidden;
}}
summary {{
display: flex;
align-items: center;
justify-content: space-between;
gap: 12px;
padding: 12px 14px;
cursor: pointer;
background: var(--soft);
font-weight: 700;
}}
summary small {{
color: var(--muted);
font-weight: 500;
}}
.table-wrap {{
width: 100%;
overflow: auto;
background: #fff;
}}
table {{
width: 100%;
border-collapse: collapse;
font-size: 13px;
min-width: 720px;
}}
th, td {{
border-top: 1px solid var(--line);
border-right: 1px solid var(--line);
padding: 8px 10px;
text-align: left;
vertical-align: top;
white-space: nowrap;
}}
th {{
background: #fbfcfd;
font-weight: 700;
}}
.empty, .more {{
color: var(--muted);
font-size: 13px;
padding: 12px 14px;
}}
@media (max-width: 760px) {{
header {{ display: block; }}
.grid, .summary {{ grid-template-columns: 1fr; }}
.actions, .file-head {{ align-items: stretch; flex-direction: column; }}
button, .button, .small-button, .ghost {{ width: 100%; }}
}}
</style>
</head>
<body>
<main class="shell">
<header>
<div>
<h1>检测数据处理</h1>
<div class="sub">{html.escape(subtitle)}</div>
</div>
</header>
{body}
</main>
</body>
</html>
"""

View File

@@ -3,8 +3,11 @@ import shutil
import subprocess import subprocess
import sys import sys
import zipfile import zipfile
from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from openpyxl import load_workbook
PROCESSOR_DIR = Path(__file__).resolve().parent / "processors" PROCESSOR_DIR = Path(__file__).resolve().parent / "processors"
@@ -13,6 +16,30 @@ class ProcessingError(Exception):
pass pass
@dataclass
class SheetSummary:
name: str
rows: int
columns: int
preview: list[list[str]]
@dataclass
class ExcelSummary:
filename: str
relpath: str
sheets: list[SheetSummary]
@dataclass
class ProcessingResult:
job_id: str
mode: str
output_dir: Path
zip_path: Path
files: list[ExcelSummary]
def run_processing( def run_processing(
zip_path: Path, zip_path: Path,
job_dir: Path, job_dir: Path,
@@ -21,7 +48,7 @@ def run_processing(
result_name: str, result_name: str,
show_not_match: bool, show_not_match: bool,
show_all_infos: bool, show_all_infos: bool,
) -> Path: ) -> ProcessingResult:
if mode not in {"auto", "v1", "v2"}: if mode not in {"auto", "v1", "v2"}:
raise ProcessingError("处理模式不正确。") raise ProcessingError("处理模式不正确。")
if data_type not in {"pat_no", "zhuyuanhao"}: if data_type not in {"pat_no", "zhuyuanhao"}:
@@ -82,8 +109,7 @@ def run_processing(
timeout=60 * 30, timeout=60 * 30,
) )
log_path = output_dir / "process.log" (job_dir / "process.log").write_text(
log_path.write_text(
"mode=" + selected_mode + "\n\n" + completed.stdout, "mode=" + selected_mode + "\n\n" + completed.stdout,
encoding="utf-8", encoding="utf-8",
) )
@@ -92,20 +118,41 @@ def run_processing(
if selected_mode == "v2": if selected_mode == "v2":
_collect_v2_outputs(data_dir, output_dir) _collect_v2_outputs(data_dir, output_dir)
_collect_logs(data_dir, output_dir)
xlsx_files = list(output_dir.rglob("*.xlsx")) xlsx_files = sorted(output_dir.rglob("*.xlsx"))
if not xlsx_files: if not xlsx_files:
raise ProcessingError("处理完成但没有生成 Excel 文件,请检查数据结构和 process.log") raise ProcessingError("处理完成但没有生成 Excel 文件,请检查数据结构。")
result_zip = job_dir / "result.zip" result_zip = job_dir / "result.zip"
with zipfile.ZipFile(result_zip, "w", compression=zipfile.ZIP_DEFLATED) as zf: _create_result_zip(output_dir, result_zip)
for path in output_dir.rglob("*"): return ProcessingResult(
if path.is_file(): job_id=job_dir.name,
zf.write(path, path.relative_to(output_dir)) mode=selected_mode,
output_dir=output_dir,
zip_path=result_zip,
files=[_summarize_workbook(path, output_dir) for path in xlsx_files],
)
def create_result_zip(job_dir: Path) -> Path:
output_dir = job_dir / "output"
result_zip = job_dir / "result.zip"
if not output_dir.exists():
raise ProcessingError("结果目录不存在。")
_create_result_zip(output_dir, result_zip)
return result_zip return result_zip
def find_output_file(job_dir: Path, relpath: str) -> Path:
output_dir = (job_dir / "output").resolve()
target = (output_dir / relpath).resolve()
if not str(target).startswith(str(output_dir)) or not target.is_file():
raise ProcessingError("结果文件不存在。")
if target.suffix.lower() != ".xlsx":
raise ProcessingError("只能导出 Excel 结果文件。")
return target
def _safe_extract(zip_path: Path, target_dir: Path) -> None: def _safe_extract(zip_path: Path, target_dir: Path) -> None:
try: try:
with zipfile.ZipFile(zip_path) as zf: with zipfile.ZipFile(zip_path) as zf:
@@ -169,13 +216,41 @@ def _collect_v2_outputs(data_dir: Path, output_dir: Path) -> None:
shutil.copy2(path, target) shutil.copy2(path, target)
def _collect_logs(data_dir: Path, output_dir: Path) -> None: def _create_result_zip(output_dir: Path, result_zip: Path) -> None:
logs_dir = output_dir / "logs" with zipfile.ZipFile(result_zip, "w", compression=zipfile.ZIP_DEFLATED) as zf:
for pattern in ("*.txt", "error.txt", "Error.txt"): for path in sorted(output_dir.rglob("*.xlsx")):
for path in data_dir.rglob(pattern):
if path.is_file(): if path.is_file():
logs_dir.mkdir(exist_ok=True) zf.write(path, path.relative_to(output_dir))
relative = path.relative_to(data_dir)
target = logs_dir / relative
target.parent.mkdir(parents=True, exist_ok=True) def _summarize_workbook(path: Path, output_dir: Path) -> ExcelSummary:
shutil.copy2(path, target) sheets: list[SheetSummary] = []
workbook = load_workbook(path, read_only=True, data_only=True)
try:
for sheet in workbook.worksheets:
preview: list[list[str]] = []
for row in sheet.iter_rows(max_row=6, values_only=True):
preview.append([_cell_to_text(value) for value in row])
sheets.append(
SheetSummary(
name=sheet.title,
rows=sheet.max_row or 0,
columns=sheet.max_column or 0,
preview=preview,
)
)
finally:
workbook.close()
return ExcelSummary(
filename=path.name,
relpath=path.relative_to(output_dir).as_posix(),
sheets=sheets,
)
def _cell_to_text(value: object) -> str:
if value is None:
return ""
text = str(value)
return text if len(text) <= 80 else text[:77] + "..."