import os import uuid import shutil import subprocess from fastapi import FastAPI, UploadFile, File, BackgroundTasks # 1. 导入 BackgroundTasks from fastapi.responses import FileResponse import uvicorn app = FastAPI(title="MinerU Extraction API") # 定义一个清理函数 def cleanup_task_dir(dir_path: str): """删除指定的文件夹及其所有内容""" if os.path.exists(dir_path): shutil.rmtree(dir_path) print(f"🧹 已自动清理临时目录: {dir_path}") @app.post("/extract") async def extract_document(background_tasks: BackgroundTasks, file: UploadFile = File(...)): # 2. 注入后台任务参数 # 1. 创建独立的任务文件夹 task_id = str(uuid.uuid4())[:8] work_dir = os.path.abspath(f"./api_workspace/{task_id}") out_dir = os.path.join(work_dir, "OUT") os.makedirs(work_dir, exist_ok=True) input_file_path = os.path.join(work_dir, file.filename) try: # 2. 接收并保存文件 print(f"[Task {task_id}] 收到文件: {file.filename},开始处理...") with open(input_file_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) # 3. 执行 MinerU 命令 subprocess.run(["mineru", "-p", input_file_path, "-o", out_dir], check=True) # 4. 穿透外层冗余文件夹,寻找真实的输出目录 target_zip_dir = out_dir for root, dirs, files in os.walk(out_dir): if any(f.endswith('.md') for f in files): target_zip_dir = root break # 5. 打包结果 zip_base_path = os.path.join(work_dir, "result") shutil.make_archive(zip_base_path, 'zip', target_zip_dir) zip_file_path = f"{zip_base_path}.zip" # 6. 【核心改进】:添加后台任务,在响应发送后删除 work_dir background_tasks.add_task(cleanup_task_dir, work_dir) # 7. 返回给客户端 return FileResponse( zip_file_path, media_type="application/zip", filename=f"parsed_{file.filename}.zip" ) except Exception as e: # 如果出错了,也尝试清理一下该任务的文件夹,防止空间占用 background_tasks.add_task(cleanup_task_dir, work_dir) return {"status": "error", "message": str(e)} if __name__ == "__main__": print("🚀 MinerU API 启动成功!监听地址: http://192.168.4.6:5000") uvicorn.run(app, host="0.0.0.0", port=5000)