66 lines
2.5 KiB
Python
66 lines
2.5 KiB
Python
import os
|
|
import uuid
|
|
import shutil
|
|
import subprocess
|
|
from fastapi import FastAPI, UploadFile, File, BackgroundTasks # 1. 导入 BackgroundTasks
|
|
from fastapi.responses import FileResponse
|
|
import uvicorn
|
|
|
|
app = FastAPI(title="MinerU Extraction API")
|
|
|
|
# 定义一个清理函数
|
|
def cleanup_task_dir(dir_path: str):
|
|
"""删除指定的文件夹及其所有内容"""
|
|
if os.path.exists(dir_path):
|
|
shutil.rmtree(dir_path)
|
|
print(f"🧹 已自动清理临时目录: {dir_path}")
|
|
|
|
@app.post("/extract")
|
|
async def extract_document(background_tasks: BackgroundTasks, file: UploadFile = File(...)): # 2. 注入后台任务参数
|
|
# 1. 创建独立的任务文件夹
|
|
task_id = str(uuid.uuid4())[:8]
|
|
work_dir = os.path.abspath(f"./api_workspace/{task_id}")
|
|
out_dir = os.path.join(work_dir, "OUT")
|
|
os.makedirs(work_dir, exist_ok=True)
|
|
|
|
input_file_path = os.path.join(work_dir, file.filename)
|
|
|
|
try:
|
|
# 2. 接收并保存文件
|
|
print(f"[Task {task_id}] 收到文件: {file.filename},开始处理...")
|
|
with open(input_file_path, "wb") as buffer:
|
|
shutil.copyfileobj(file.file, buffer)
|
|
|
|
# 3. 执行 MinerU 命令
|
|
subprocess.run(["mineru", "-p", input_file_path, "-o", out_dir], check=True)
|
|
|
|
# 4. 穿透外层冗余文件夹,寻找真实的输出目录
|
|
target_zip_dir = out_dir
|
|
for root, dirs, files in os.walk(out_dir):
|
|
if any(f.endswith('.md') for f in files):
|
|
target_zip_dir = root
|
|
break
|
|
|
|
# 5. 打包结果
|
|
zip_base_path = os.path.join(work_dir, "result")
|
|
shutil.make_archive(zip_base_path, 'zip', target_zip_dir)
|
|
zip_file_path = f"{zip_base_path}.zip"
|
|
|
|
# 6. 【核心改进】:添加后台任务,在响应发送后删除 work_dir
|
|
background_tasks.add_task(cleanup_task_dir, work_dir)
|
|
|
|
# 7. 返回给客户端
|
|
return FileResponse(
|
|
zip_file_path,
|
|
media_type="application/zip",
|
|
filename=f"parsed_{file.filename}.zip"
|
|
)
|
|
|
|
except Exception as e:
|
|
# 如果出错了,也尝试清理一下该任务的文件夹,防止空间占用
|
|
background_tasks.add_task(cleanup_task_dir, work_dir)
|
|
return {"status": "error", "message": str(e)}
|
|
|
|
if __name__ == "__main__":
|
|
print("🚀 MinerU API 启动成功!监听地址: http://192.168.4.6:5000")
|
|
uvicorn.run(app, host="0.0.0.0", port=5000) |