"""Background media parsing runner used by Celery workers.""" import logging import os import shutil import tempfile from datetime import datetime, timezone from pathlib import Path from typing import Any from sqlalchemy.orm import Session from minio_client import BUCKET_NAME, download_file, get_minio_client, upload_file from models import Frame, ProcessingTask, Project from progress_events import publish_task_progress_event from services.frame_parser import ( extract_thumbnail, parse_dicom, parse_video, upload_frames_to_minio, ) from statuses import ( PROJECT_STATUS_ERROR, PROJECT_STATUS_PARSING, PROJECT_STATUS_READY, TASK_STATUS_FAILED, TASK_STATUS_RUNNING, TASK_STATUS_SUCCESS, ) logger = logging.getLogger(__name__) def _now() -> datetime: return datetime.now(timezone.utc) def _set_task_state( db: Session, task: ProcessingTask, *, status: str | None = None, progress: int | None = None, message: str | None = None, result: dict[str, Any] | None = None, error: str | None = None, started: bool = False, finished: bool = False, ) -> None: if status is not None: task.status = status if progress is not None: task.progress = max(0, min(100, progress)) if message is not None: task.message = message if result is not None: task.result = result if error is not None: task.error = error if started: task.started_at = _now() if finished: task.finished_at = _now() db.commit() db.refresh(task) publish_task_progress_event(task) def run_parse_media_task(db: Session, task_id: int) -> dict[str, Any]: """Parse one project's media and update task progress in the database.""" task = db.query(ProcessingTask).filter(ProcessingTask.id == task_id).first() if not task: raise ValueError(f"Task not found: {task_id}") if task.project_id is None: _set_task_state( db, task, status=TASK_STATUS_FAILED, progress=100, message="任务缺少 project_id", error="Task has no project_id", finished=True, ) raise ValueError("Task has no project_id") project = db.query(Project).filter(Project.id == task.project_id).first() if not project: _set_task_state( db, task, status=TASK_STATUS_FAILED, progress=100, message="项目不存在", error="Project not found", finished=True, ) raise ValueError(f"Project not found: {task.project_id}") if not project.video_path: _set_task_state( db, task, status=TASK_STATUS_FAILED, progress=100, message="项目没有可解析媒体", error="Project has no media uploaded", finished=True, ) project.status = PROJECT_STATUS_ERROR db.commit() raise ValueError("Project has no media uploaded") project.status = PROJECT_STATUS_PARSING _set_task_state(db, task, status=TASK_STATUS_RUNNING, progress=5, message="后台解析已启动", started=True) effective_source = (task.payload or {}).get("source_type") or project.source_type or "video" parse_fps = project.parse_fps or 30.0 tmp_dir = tempfile.mkdtemp(prefix=f"seg_parse_{project.id}_") output_dir = os.path.join(tmp_dir, "frames") os.makedirs(output_dir, exist_ok=True) try: _set_task_state(db, task, progress=15, message="正在下载媒体文件") if effective_source == "dicom": dcm_dir = os.path.join(tmp_dir, "dcm") os.makedirs(dcm_dir, exist_ok=True) client = get_minio_client() objects = list(client.list_objects(BUCKET_NAME, prefix=project.video_path, recursive=True)) for obj in objects: if obj.object_name.lower().endswith(".dcm"): data = download_file(obj.object_name) local_dcm = os.path.join(dcm_dir, os.path.basename(obj.object_name)) with open(local_dcm, "wb") as f: f.write(data) _set_task_state(db, task, progress=35, message="正在解析 DICOM 序列") frame_files = parse_dicom(dcm_dir, output_dir) else: media_bytes = download_file(project.video_path) local_path = os.path.join(tmp_dir, Path(project.video_path).name) with open(local_path, "wb") as f: f.write(media_bytes) _set_task_state(db, task, progress=35, message="正在使用 FFmpeg/OpenCV 拆帧") frame_files, original_fps = parse_video(local_path, output_dir, fps=int(parse_fps)) project.original_fps = original_fps thumbnail_path = os.path.join(tmp_dir, "thumbnail.jpg") try: extract_thumbnail(local_path, thumbnail_path) with open(thumbnail_path, "rb") as f: thumb_data = f.read() thumb_object = f"projects/{project.id}/thumbnail.jpg" upload_file(thumb_object, thumb_data, content_type="image/jpeg", length=len(thumb_data)) project.thumbnail_url = thumb_object except Exception as exc: # noqa: BLE001 logger.warning("Thumbnail extraction failed: %s", exc) _set_task_state(db, task, progress=70, message="正在上传帧到对象存储") object_names = upload_frames_to_minio(frame_files, project.id) _set_task_state(db, task, progress=85, message="正在写入帧索引") frames_out = [] for idx, obj_name in enumerate(object_names): local_frame = frame_files[idx] try: import cv2 img = cv2.imread(local_frame) h, w = img.shape[:2] if img is not None else (None, None) except Exception: # noqa: BLE001 h, w = None, None frame = Frame( project_id=project.id, frame_index=idx, image_url=obj_name, width=w, height=h, ) db.add(frame) frames_out.append(frame) project.status = PROJECT_STATUS_READY db.commit() result = { "project_id": project.id, "frames_extracted": len(frames_out), "status": PROJECT_STATUS_READY, "message": "Frame extraction completed successfully.", } _set_task_state( db, task, status=TASK_STATUS_SUCCESS, progress=100, message="解析完成", result=result, finished=True, ) logger.info("Parsed %d frames for project_id=%s", len(frames_out), project.id) return result except Exception as exc: # noqa: BLE001 project.status = PROJECT_STATUS_ERROR _set_task_state( db, task, status=TASK_STATUS_FAILED, progress=100, message="解析失败", error=str(exc), finished=True, ) logger.error("Frame extraction failed: %s", exc) raise finally: shutil.rmtree(tmp_dir, ignore_errors=True)