Files
my-vault/Dehaze/Mineru_api_server-V2.py

66 lines
2.5 KiB
Python

import os
import uuid
import shutil
import subprocess
from fastapi import FastAPI, UploadFile, File, BackgroundTasks # 1. 导入 BackgroundTasks
from fastapi.responses import FileResponse
import uvicorn
app = FastAPI(title="MinerU Extraction API")
# 定义一个清理函数
def cleanup_task_dir(dir_path: str):
"""删除指定的文件夹及其所有内容"""
if os.path.exists(dir_path):
shutil.rmtree(dir_path)
print(f"🧹 已自动清理临时目录: {dir_path}")
@app.post("/extract")
async def extract_document(background_tasks: BackgroundTasks, file: UploadFile = File(...)): # 2. 注入后台任务参数
# 1. 创建独立的任务文件夹
task_id = str(uuid.uuid4())[:8]
work_dir = os.path.abspath(f"./api_workspace/{task_id}")
out_dir = os.path.join(work_dir, "OUT")
os.makedirs(work_dir, exist_ok=True)
input_file_path = os.path.join(work_dir, file.filename)
try:
# 2. 接收并保存文件
print(f"[Task {task_id}] 收到文件: {file.filename},开始处理...")
with open(input_file_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
# 3. 执行 MinerU 命令
subprocess.run(["mineru", "-p", input_file_path, "-o", out_dir], check=True)
# 4. 穿透外层冗余文件夹,寻找真实的输出目录
target_zip_dir = out_dir
for root, dirs, files in os.walk(out_dir):
if any(f.endswith('.md') for f in files):
target_zip_dir = root
break
# 5. 打包结果
zip_base_path = os.path.join(work_dir, "result")
shutil.make_archive(zip_base_path, 'zip', target_zip_dir)
zip_file_path = f"{zip_base_path}.zip"
# 6. 【核心改进】:添加后台任务,在响应发送后删除 work_dir
background_tasks.add_task(cleanup_task_dir, work_dir)
# 7. 返回给客户端
return FileResponse(
zip_file_path,
media_type="application/zip",
filename=f"parsed_{file.filename}.zip"
)
except Exception as e:
# 如果出错了,也尝试清理一下该任务的文件夹,防止空间占用
background_tasks.add_task(cleanup_task_dir, work_dir)
return {"status": "error", "message": str(e)}
if __name__ == "__main__":
print("🚀 MinerU API 启动成功!监听地址: http://192.168.4.6:5000")
uvicorn.run(app, host="0.0.0.0", port=5000)