diff --git a/README.md b/README.md index 68b496b..3e4ae23 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # HIS_Sur_Data_Deal -网页端检测数据处理工具。上传 `待处理检测数据.zip` 后,服务会自动识别 V1/V2 数据结构,调用原处理脚本生成 Excel,并在网页中展示结果摘要、工作表统计和部分数据预览;用户可按需导出单个 Excel 或全部 Excel 压缩包。 +网页端检测数据处理工具。上传 `待处理检测数据.zip` 后,服务会自动识别 V1/V2 数据结构,调用原处理脚本生成 Excel,并在网页中展示结果摘要、工作表统计和数据预览;用户可调整每个工作表的预览行数,并按需导出单个 Excel 或全部 Excel 压缩包。 ## 本地运行 @@ -31,4 +31,4 @@ V1:zip 解压后包含 `Patients_info.csv`、`Tests_List`、`Tests_Detail_List V2:zip 解压后包含 `Patients_info.csv`,并按患者目录分别保存检测汇总和具体检测,输出多个患者 Excel。 -导出的压缩包默认只包含 Excel 结果,不包含处理日志。 +导出的压缩包默认只包含 Excel 结果,不包含处理日志。系统默认输出全部检测记录,并保留“未匹配检测内容”列作为额外辅助信息。 diff --git a/app/main.py b/app/main.py index 95044b9..b68b0c1 100644 --- a/app/main.py +++ b/app/main.py @@ -14,56 +14,59 @@ from .processor import ( create_result_zip, find_output_file, run_processing, + summarize_job, ) WORK_ROOT = Path(tempfile.gettempdir()) / "his_sur_data_deal_jobs" WORK_ROOT.mkdir(parents=True, exist_ok=True) -app = FastAPI(title="检测数据处理") +APP_TITLE = "\u68c0\u6d4b\u6570\u636e\u5904\u7406" +UNMATCHED = "\u672a\u5339\u914d\u68c0\u6d4b\u5185\u5bb9" + +app = FastAPI(title=APP_TITLE) @app.get("/", response_class=HTMLResponse) def index() -> str: - return _page_shell( - """ -
-
+ body = f""" +
+ +
+ + +
+
- - + +
-
-
- - -
-
- - -
-
- - -
+
+ +
-
- - +
+ +
- - -
V1 适用于含有 Patients_info.csv、Tests_List、Tests_Detail_List 的批量数据;V2 适用于每个患者单独目录的数据。
-
- """ - ) +
+ + +
+ + + +
\u9ed8\u8ba4\u8f93\u51fa\u5168\u90e8\u68c0\u6d4b\u8bb0\u5f55\uff0c\u5e76\u4fdd\u7559\u201c{UNMATCHED}\u201d\u5217\uff1b\u8be5\u5217\u4e3a\u989d\u5916\u8f85\u52a9\u4fe1\u606f\uff0c\u4fbf\u4e8e\u6838\u5bf9\u672a\u5f52\u7c7b\u9879\u76ee\u3002
+
+ """ + return _page_shell(body) @app.post("/process", response_class=HTMLResponse) @@ -72,11 +75,10 @@ async def process( mode: str = Form("auto"), data_type: str = Form("pat_no"), result_name: str = Form("Result"), - show_not_match: str | None = Form(None), - show_all_infos: str | None = Form(None), + preview_rows: int = Form(20), ) -> str: if not file.filename or not file.filename.lower().endswith(".zip"): - raise HTTPException(status_code=400, detail="请上传 zip 文件。") + raise HTTPException(status_code=400, detail="\u8bf7\u4e0a\u4f20 zip \u6587\u4ef6\u3002") job_dir = WORK_ROOT / uuid.uuid4().hex job_dir.mkdir(parents=True, exist_ok=True) @@ -85,21 +87,34 @@ async def process( with upload_path.open("wb") as out: shutil.copyfileobj(file.file, out) + rows = _clean_preview_rows(preview_rows) result = run_processing( zip_path=upload_path, job_dir=job_dir, mode=mode, data_type=data_type, result_name=result_name, - show_not_match=show_not_match == "true", - show_all_infos=show_all_infos == "true", + show_not_match=True, + show_all_infos=True, + preview_rows=rows, ) except ProcessingError as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc except Exception as exc: - raise HTTPException(status_code=500, detail=f"处理失败:{exc}") from exc + raise HTTPException(status_code=500, detail=f"\u5904\u7406\u5931\u8d25\uff1a{exc}") from exc - return _render_result(result) + return _render_result(result, preview_rows=rows) + + +@app.get("/result/{job_id}", response_class=HTMLResponse) +def result_page(job_id: str, preview_rows: int = Query(20, ge=5, le=200)) -> str: + job_dir = _get_job_dir(job_id) + try: + rows = _clean_preview_rows(preview_rows) + result = summarize_job(job_dir, rows) + except ProcessingError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + return _render_result(result, preview_rows=rows) @app.get("/download/all/{job_id}") @@ -109,11 +124,7 @@ def download_all(job_id: str) -> FileResponse: result_zip = create_result_zip(job_dir) except ProcessingError as exc: raise HTTPException(status_code=404, detail=str(exc)) from exc - return FileResponse( - result_zip, - media_type="application/zip", - filename="检测数据处理结果.zip", - ) + return FileResponse(result_zip, media_type="application/zip", filename="result.zip") @app.get("/download/file/{job_id}") @@ -137,89 +148,96 @@ def health() -> dict[str, str]: def _get_job_dir(job_id: str) -> Path: if not job_id.isalnum(): - raise HTTPException(status_code=404, detail="结果不存在。") + raise HTTPException(status_code=404, detail="\u7ed3\u679c\u4e0d\u5b58\u5728\u3002") job_dir = (WORK_ROOT / job_id).resolve() if not str(job_dir).startswith(str(WORK_ROOT.resolve())) or not job_dir.exists(): - raise HTTPException(status_code=404, detail="结果不存在。") + raise HTTPException(status_code=404, detail="\u7ed3\u679c\u4e0d\u5b58\u5728\u3002") return job_dir -def _render_result(result: ProcessingResult) -> str: +def _clean_preview_rows(preview_rows: int) -> int: + return max(5, min(int(preview_rows or 20), 200)) + + +def _render_result(result: ProcessingResult, preview_rows: int = 20) -> str: total_sheets = sum(len(file.sheets) for file in result.files) total_rows = sum(max(sheet.rows - 1, 0) for file in result.files for sheet in file.sheets) file_items = "\n".join(_render_file(file, result.job_id) for file in result.files) body = f"""
-
处理模式{html.escape(result.mode.upper())}
-
Excel 文件{len(result.files)}
-
工作表{total_sheets}
-
数据行{total_rows}
+
\u5904\u7406\u6a21\u5f0f{html.escape(result.mode.upper())}
+
Excel \u6587\u4ef6{len(result.files)}
+
\u5de5\u4f5c\u8868{total_sheets}
+
\u6570\u636e\u884c{total_rows}
- 导出全部 Excel - 继续处理新文件 + \u5bfc\u51fa\u5168\u90e8 Excel + \u7ee7\u7eed\u5904\u7406\u65b0\u6587\u4ef6
+
+ + + +
+
\u7ed3\u679c\u4e2d\u201c{UNMATCHED}\u201d\u5217\u4e3a\u989d\u5916\u8f85\u52a9\u4fe1\u606f\uff0c\u7528\u6765\u6807\u660e\u672a\u5f52\u5165\u6807\u51c6\u9879\u76ee\u7684\u68c0\u6d4b\u5185\u5bb9\u3002
{file_items}
""" - return _page_shell(body, subtitle="处理完成,可先查看结果摘要和部分预览,再选择导出。") + return _page_shell(body, subtitle="\u5904\u7406\u5b8c\u6210\uff0c\u53ef\u5411\u4e0b\u6eda\u52a8\u67e5\u770b\u66f4\u591a\u5de5\u4f5c\u8868\uff0c\u4e5f\u53ef\u8c03\u6574\u9884\u89c8\u884c\u6570\u540e\u5237\u65b0\u3002") def _render_file(file, job_id: str) -> str: - sheet_items = "\n".join(_render_sheet(sheet) for sheet in file.sheets[:6]) - more = "" - if len(file.sheets) > 6: - more = f'
还有 {len(file.sheets) - 6} 个工作表未展开预览,可导出 Excel 查看完整内容。
' + sheet_items = "\n".join(_render_sheet(sheet) for sheet in file.sheets) file_url = f"/download/file/{html.escape(job_id)}?path={quote(file.relpath)}" return f"""

{html.escape(file.filename)}

-

{len(file.sheets)} 个工作表

+

{len(file.sheets)} \u4e2a\u5de5\u4f5c\u8868

- 导出此 Excel + \u5bfc\u51fa\u6b64 Excel
{sheet_items} - {more}
""" def _render_sheet(sheet) -> str: - preview = sheet.preview[:6] - table = '
此工作表没有可预览的数据。
' + preview = sheet.preview + table = '
\u6b64\u5de5\u4f5c\u8868\u6ca1\u6709\u53ef\u9884\u89c8\u7684\u6570\u636e\u3002
' if preview: - max_cols = min(max((len(row) for row in preview), default=0), 12) + max_cols = min(max((len(row) for row in preview), default=0), 18) rows = [] for index, row in enumerate(preview): cells = [] for value in (row + [""] * max_cols)[:max_cols]: tag = "th" if index == 0 else "td" - cells.append(f"<{tag}>{html.escape(value)}") + css_class = ' class="extra-col"' if index == 0 and value == UNMATCHED else "" + cells.append(f"<{tag}{css_class}>{html.escape(value)}") rows.append("" + "".join(cells) + "") - table = f"
{''.join(rows)}
" + table = f'
{"".join(rows)}
' return f"""
{html.escape(sheet.name)} - {sheet.rows} 行 · {sheet.columns} 列 + {sheet.rows} \u884c · {sheet.columns} \u5217 {table}
""" -def _page_shell(body: str, subtitle: str = "上传“待处理检测数据.zip”,处理完成后在网页中查看结果。") -> str: +def _page_shell(body: str, subtitle: str = "\u4e0a\u4f20\u201c\u5f85\u5904\u7406\u68c0\u6d4b\u6570\u636e.zip\u201d\uff0c\u5904\u7406\u5b8c\u6210\u540e\u5728\u7f51\u9875\u4e2d\u67e5\u770b\u7ed3\u679c\u3002") -> str: return f""" - 检测数据处理 + {APP_TITLE} @@ -447,10 +450,8 @@ def _page_shell(body: str, subtitle: str = "上传“待处理检测数据.zip
-
-

检测数据处理

-
{html.escape(subtitle)}
-
+

{APP_TITLE}

+
{html.escape(subtitle)}
{body}
diff --git a/app/processor.py b/app/processor.py index b4ed733..a263c83 100644 --- a/app/processor.py +++ b/app/processor.py @@ -48,6 +48,7 @@ def run_processing( result_name: str, show_not_match: bool, show_all_infos: bool, + preview_rows: int = 20, ) -> ProcessingResult: if mode not in {"auto", "v1", "v2"}: raise ProcessingError("处理模式不正确。") @@ -123,6 +124,9 @@ def run_processing( if not xlsx_files: raise ProcessingError("处理完成但没有生成 Excel 文件,请检查数据结构。") + for xlsx_file in xlsx_files: + _remove_default_empty_sheet(xlsx_file) + result_zip = job_dir / "result.zip" _create_result_zip(output_dir, result_zip) return ProcessingResult( @@ -130,7 +134,7 @@ def run_processing( mode=selected_mode, output_dir=output_dir, zip_path=result_zip, - files=[_summarize_workbook(path, output_dir) for path in xlsx_files], + files=[_summarize_workbook(path, output_dir, preview_rows) for path in xlsx_files], ) @@ -143,6 +147,24 @@ def create_result_zip(job_dir: Path) -> Path: return result_zip +def summarize_job(job_dir: Path, preview_rows: int = 20) -> ProcessingResult: + output_dir = job_dir / "output" + if not output_dir.exists(): + raise ProcessingError("结果目录不存在。") + xlsx_files = sorted(output_dir.rglob("*.xlsx")) + if not xlsx_files: + raise ProcessingError("结果文件不存在。") + result_zip = job_dir / "result.zip" + mode = _read_mode(job_dir) + return ProcessingResult( + job_id=job_dir.name, + mode=mode, + output_dir=output_dir, + zip_path=result_zip, + files=[_summarize_workbook(path, output_dir, preview_rows) for path in xlsx_files], + ) + + def find_output_file(job_dir: Path, relpath: str) -> Path: output_dir = (job_dir / "output").resolve() target = (output_dir / relpath).resolve() @@ -153,6 +175,16 @@ def find_output_file(job_dir: Path, relpath: str) -> Path: return target +def _read_mode(job_dir: Path) -> str: + log_path = job_dir / "process.log" + if not log_path.exists(): + return "unknown" + first_line = log_path.read_text(encoding="utf-8", errors="replace").splitlines()[0:1] + if first_line and first_line[0].startswith("mode="): + return first_line[0].split("=", 1)[1] + return "unknown" + + def _safe_extract(zip_path: Path, target_dir: Path) -> None: try: with zipfile.ZipFile(zip_path) as zf: @@ -223,13 +255,33 @@ def _create_result_zip(output_dir: Path, result_zip: Path) -> None: zf.write(path, path.relative_to(output_dir)) -def _summarize_workbook(path: Path, output_dir: Path) -> ExcelSummary: +def _remove_default_empty_sheet(path: Path) -> None: + workbook = load_workbook(path) + try: + if "Sheet" in workbook.sheetnames and len(workbook.sheetnames) > 1: + sheet = workbook["Sheet"] + if _is_empty_sheet(sheet): + workbook.remove(sheet) + workbook.save(path) + finally: + workbook.close() + + +def _is_empty_sheet(sheet) -> bool: + for row in sheet.iter_rows(values_only=True): + for value in row: + if value not in (None, ""): + return False + return True + + +def _summarize_workbook(path: Path, output_dir: Path, preview_rows: int) -> ExcelSummary: sheets: list[SheetSummary] = [] workbook = load_workbook(path, read_only=True, data_only=True) try: for sheet in workbook.worksheets: preview: list[list[str]] = [] - for row in sheet.iter_rows(max_row=6, values_only=True): + for row in sheet.iter_rows(max_row=max(2, min(preview_rows, 200)), values_only=True): preview.append([_cell_to_text(value) for value in row]) sheets.append( SheetSummary(