diff --git a/README.md b/README.md index 164d258..68b496b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # HIS_Sur_Data_Deal -网页端检测数据处理工具。上传 `待处理检测数据.zip` 后,服务会自动识别 V1/V2 数据结构,调用原处理脚本生成 Excel,并返回结果压缩包。 +网页端检测数据处理工具。上传 `待处理检测数据.zip` 后,服务会自动识别 V1/V2 数据结构,调用原处理脚本生成 Excel,并在网页中展示结果摘要、工作表统计和部分数据预览;用户可按需导出单个 Excel 或全部 Excel 压缩包。 ## 本地运行 @@ -31,3 +31,4 @@ V1:zip 解压后包含 `Patients_info.csv`、`Tests_List`、`Tests_Detail_List V2:zip 解压后包含 `Patients_info.csv`,并按患者目录分别保存检测汇总和具体检测,输出多个患者 Excel。 +导出的压缩包默认只包含 Excel 结果,不包含处理日志。 diff --git a/app/main.py b/app/main.py index 3956e33..95044b9 100644 --- a/app/main.py +++ b/app/main.py @@ -3,14 +3,20 @@ import shutil import tempfile import uuid from pathlib import Path +from urllib.parse import quote -from fastapi import FastAPI, File, Form, HTTPException, UploadFile +from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile from fastapi.responses import FileResponse, HTMLResponse -from .processor import ProcessingError, run_processing +from .processor import ( + ProcessingError, + ProcessingResult, + create_result_zip, + find_output_file, + run_processing, +) -APP_ROOT = Path(__file__).resolve().parent WORK_ROOT = Path(tempfile.gettempdir()) / "his_sur_data_deal_jobs" WORK_ROOT.mkdir(parents=True, exist_ok=True) @@ -19,185 +25,48 @@ app = FastAPI(title="检测数据处理") @app.get("/", response_class=HTMLResponse) def index() -> str: - return """ - - - - - - 检测数据处理 - - - -
-
-
-

检测数据处理

-
上传“待处理检测数据.zip”,处理完成后自动下载结果压缩包。
-
-
-
-
-
- - -
-
-
- - -
-
- - -
-
- - -
-
-
- - -
- -
-
V1 适用于含有 Patients_info.csv、Tests_List、Tests_Detail_List 的批量数据;V2 适用于每个患者单独目录的数据。
-
-
- - -""" + return _page_shell( + """ +
+
+
+ + +
+
+
+ + +
+
+ + +
+
+ + +
+
+
+ + +
+ +
+
V1 适用于含有 Patients_info.csv、Tests_List、Tests_Detail_List 的批量数据;V2 适用于每个患者单独目录的数据。
+
+ """ + ) -@app.post("/process") +@app.post("/process", response_class=HTMLResponse) async def process( file: UploadFile = File(...), mode: str = Form("auto"), @@ -205,7 +74,7 @@ async def process( result_name: str = Form("Result"), show_not_match: str | None = Form(None), show_all_infos: str | None = Form(None), -) -> FileResponse: +) -> str: if not file.filename or not file.filename.lower().endswith(".zip"): raise HTTPException(status_code=400, detail="请上传 zip 文件。") @@ -216,7 +85,7 @@ async def process( with upload_path.open("wb") as out: shutil.copyfileobj(file.file, out) - result_zip = run_processing( + result = run_processing( zip_path=upload_path, job_dir=job_dir, mode=mode, @@ -226,11 +95,20 @@ async def process( show_all_infos=show_all_infos == "true", ) except ProcessingError as exc: - safe_detail = html.escape(str(exc)) - raise HTTPException(status_code=400, detail=safe_detail) from exc + raise HTTPException(status_code=400, detail=str(exc)) from exc except Exception as exc: raise HTTPException(status_code=500, detail=f"处理失败:{exc}") from exc + return _render_result(result) + + +@app.get("/download/all/{job_id}") +def download_all(job_id: str) -> FileResponse: + job_dir = _get_job_dir(job_id) + try: + result_zip = create_result_zip(job_dir) + except ProcessingError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc return FileResponse( result_zip, media_type="application/zip", @@ -238,7 +116,344 @@ async def process( ) +@app.get("/download/file/{job_id}") +def download_file(job_id: str, path: str = Query(...)) -> FileResponse: + job_dir = _get_job_dir(job_id) + try: + target = find_output_file(job_dir, path) + except ProcessingError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + return FileResponse( + target, + media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + filename=target.name, + ) + + @app.get("/health") def health() -> dict[str, str]: return {"status": "ok"} + +def _get_job_dir(job_id: str) -> Path: + if not job_id.isalnum(): + raise HTTPException(status_code=404, detail="结果不存在。") + job_dir = (WORK_ROOT / job_id).resolve() + if not str(job_dir).startswith(str(WORK_ROOT.resolve())) or not job_dir.exists(): + raise HTTPException(status_code=404, detail="结果不存在。") + return job_dir + + +def _render_result(result: ProcessingResult) -> str: + total_sheets = sum(len(file.sheets) for file in result.files) + total_rows = sum(max(sheet.rows - 1, 0) for file in result.files for sheet in file.sheets) + file_items = "\n".join(_render_file(file, result.job_id) for file in result.files) + + body = f""" +
+
处理模式{html.escape(result.mode.upper())}
+
Excel 文件{len(result.files)}
+
工作表{total_sheets}
+
数据行{total_rows}
+
+
+ 导出全部 Excel + 继续处理新文件 +
+
+ {file_items} +
+ """ + return _page_shell(body, subtitle="处理完成,可先查看结果摘要和部分预览,再选择导出。") + + +def _render_file(file, job_id: str) -> str: + sheet_items = "\n".join(_render_sheet(sheet) for sheet in file.sheets[:6]) + more = "" + if len(file.sheets) > 6: + more = f'
还有 {len(file.sheets) - 6} 个工作表未展开预览,可导出 Excel 查看完整内容。
' + file_url = f"/download/file/{html.escape(job_id)}?path={quote(file.relpath)}" + return f""" +
+
+
+

{html.escape(file.filename)}

+

{len(file.sheets)} 个工作表

+
+ 导出此 Excel +
+ {sheet_items} + {more} +
+ """ + + +def _render_sheet(sheet) -> str: + preview = sheet.preview[:6] + table = '
此工作表没有可预览的数据。
' + if preview: + max_cols = min(max((len(row) for row in preview), default=0), 12) + rows = [] + for index, row in enumerate(preview): + cells = [] + for value in (row + [""] * max_cols)[:max_cols]: + tag = "th" if index == 0 else "td" + cells.append(f"<{tag}>{html.escape(value)}") + rows.append("" + "".join(cells) + "") + table = f"
{''.join(rows)}
" + return f""" +
+ + {html.escape(sheet.name)} + {sheet.rows} 行 · {sheet.columns} 列 + + {table} +
+ """ + + +def _page_shell(body: str, subtitle: str = "上传“待处理检测数据.zip”,处理完成后在网页中查看结果。") -> str: + return f""" + + + + + + 检测数据处理 + + + +
+
+
+

检测数据处理

+
{html.escape(subtitle)}
+
+
+ {body} +
+ + +""" diff --git a/app/processor.py b/app/processor.py index 4cca040..b4ed733 100644 --- a/app/processor.py +++ b/app/processor.py @@ -3,8 +3,11 @@ import shutil import subprocess import sys import zipfile +from dataclasses import dataclass from pathlib import Path +from openpyxl import load_workbook + PROCESSOR_DIR = Path(__file__).resolve().parent / "processors" @@ -13,6 +16,30 @@ class ProcessingError(Exception): pass +@dataclass +class SheetSummary: + name: str + rows: int + columns: int + preview: list[list[str]] + + +@dataclass +class ExcelSummary: + filename: str + relpath: str + sheets: list[SheetSummary] + + +@dataclass +class ProcessingResult: + job_id: str + mode: str + output_dir: Path + zip_path: Path + files: list[ExcelSummary] + + def run_processing( zip_path: Path, job_dir: Path, @@ -21,7 +48,7 @@ def run_processing( result_name: str, show_not_match: bool, show_all_infos: bool, -) -> Path: +) -> ProcessingResult: if mode not in {"auto", "v1", "v2"}: raise ProcessingError("处理模式不正确。") if data_type not in {"pat_no", "zhuyuanhao"}: @@ -82,8 +109,7 @@ def run_processing( timeout=60 * 30, ) - log_path = output_dir / "process.log" - log_path.write_text( + (job_dir / "process.log").write_text( "mode=" + selected_mode + "\n\n" + completed.stdout, encoding="utf-8", ) @@ -92,20 +118,41 @@ def run_processing( if selected_mode == "v2": _collect_v2_outputs(data_dir, output_dir) - _collect_logs(data_dir, output_dir) - xlsx_files = list(output_dir.rglob("*.xlsx")) + xlsx_files = sorted(output_dir.rglob("*.xlsx")) if not xlsx_files: - raise ProcessingError("处理完成但没有生成 Excel 文件,请检查数据结构和 process.log。") + raise ProcessingError("处理完成但没有生成 Excel 文件,请检查数据结构。") result_zip = job_dir / "result.zip" - with zipfile.ZipFile(result_zip, "w", compression=zipfile.ZIP_DEFLATED) as zf: - for path in output_dir.rglob("*"): - if path.is_file(): - zf.write(path, path.relative_to(output_dir)) + _create_result_zip(output_dir, result_zip) + return ProcessingResult( + job_id=job_dir.name, + mode=selected_mode, + output_dir=output_dir, + zip_path=result_zip, + files=[_summarize_workbook(path, output_dir) for path in xlsx_files], + ) + + +def create_result_zip(job_dir: Path) -> Path: + output_dir = job_dir / "output" + result_zip = job_dir / "result.zip" + if not output_dir.exists(): + raise ProcessingError("结果目录不存在。") + _create_result_zip(output_dir, result_zip) return result_zip +def find_output_file(job_dir: Path, relpath: str) -> Path: + output_dir = (job_dir / "output").resolve() + target = (output_dir / relpath).resolve() + if not str(target).startswith(str(output_dir)) or not target.is_file(): + raise ProcessingError("结果文件不存在。") + if target.suffix.lower() != ".xlsx": + raise ProcessingError("只能导出 Excel 结果文件。") + return target + + def _safe_extract(zip_path: Path, target_dir: Path) -> None: try: with zipfile.ZipFile(zip_path) as zf: @@ -169,13 +216,41 @@ def _collect_v2_outputs(data_dir: Path, output_dir: Path) -> None: shutil.copy2(path, target) -def _collect_logs(data_dir: Path, output_dir: Path) -> None: - logs_dir = output_dir / "logs" - for pattern in ("*.txt", "error.txt", "Error.txt"): - for path in data_dir.rglob(pattern): +def _create_result_zip(output_dir: Path, result_zip: Path) -> None: + with zipfile.ZipFile(result_zip, "w", compression=zipfile.ZIP_DEFLATED) as zf: + for path in sorted(output_dir.rglob("*.xlsx")): if path.is_file(): - logs_dir.mkdir(exist_ok=True) - relative = path.relative_to(data_dir) - target = logs_dir / relative - target.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(path, target) + zf.write(path, path.relative_to(output_dir)) + + +def _summarize_workbook(path: Path, output_dir: Path) -> ExcelSummary: + sheets: list[SheetSummary] = [] + workbook = load_workbook(path, read_only=True, data_only=True) + try: + for sheet in workbook.worksheets: + preview: list[list[str]] = [] + for row in sheet.iter_rows(max_row=6, values_only=True): + preview.append([_cell_to_text(value) for value in row]) + sheets.append( + SheetSummary( + name=sheet.title, + rows=sheet.max_row or 0, + columns=sheet.max_column or 0, + preview=preview, + ) + ) + finally: + workbook.close() + + return ExcelSummary( + filename=path.name, + relpath=path.relative_to(output_dir).as_posix(), + sheets=sheets, + ) + + +def _cell_to_text(value: object) -> str: + if value is None: + return "" + text = str(value) + return text if len(text) <= 80 else text[:77] + "..."