feat: 建立 SAM2 标注闭环基线

- 打通工作区真实标注闭环：支持手工多边形、矩形、圆形、点区域和线段生成 mask，并可保存、回显、更新和删除后端 annotation。 - 增强 polygon 编辑器：支持顶点拖动、顶点删除、边中点插入、多 polygon 子区域选择编辑，以及区域合并和区域去除。 - 接入 GT mask 导入：后端支持二值/多类别 mask 拆分、contour 转 polygon、distance transform seed point，前端支持导入、回显和 seed point 拖动编辑。 - 完善导出能力：COCO JSON 导出对齐前端，PNG mask ZIP 同时包含单标注 mask、按 zIndex 融合的 semantic_frame 和 semantic_classes.json。 - 打通异步任务管理：新增任务取消、重试、失败详情接口与 Dashboard 控件，worker 支持取消状态检查并通过 Redis/WebSocket 推送 cancelled 事件。 - 对接 Dashboard 后端数据：概览统计、解析队列和实时流转记录从 FastAPI 聚合接口与 WebSocket 更新。 - 增强 AI 推理参数：前端发送 crop_to_prompt、auto_filter_background 和 min_score，后端支持点/框 prompt 局部裁剪推理、结果回映射和负向点/低分过滤。 - 接入 SAM3 基础设施：新增独立 Python 3.12 sam3 环境安装脚本、外部 worker helper、后端桥接和真实 Python/CUDA/包/HF checkpoint access 状态检测。 - 保留 SAM3 授权边界：当前官方 facebook/sam3 gated 权重未授权时状态接口会返回不可用，不伪装成可推理。 - 增强前端状态管理：新增 mask undo/redo 历史栈、AI 模型选择状态、保存状态 dirty/draft/saved 流转和项目状态归一化。 - 更新前端 API 封装：补充 annotation CRUD、GT mask import、mask ZIP export、task cancel/retry/detail、AI runtime status 和 prediction options。 - 更新 UI 控件：ToolsPalette、AISegmentation、VideoWorkspace 和 CanvasArea 接入真实操作、导入导出、撤销重做、任务控制和模型状态。 - 新增 polygon-clipping 依赖，用于前端区域 union/difference 几何运算。 - 完善后端 schemas/status/progress：补充 AI 模型外部状态字段、任务 cancelled 状态和进度事件 payload。 - 补充测试覆盖：新增后端任务控制、SAM3 桥接、GT mask、导出融合、AI options 测试；补充前端 Canvas、Dashboard、VideoWorkspace、ToolsPalette、API 和 store 测试。 - 更新 README、AGENTS 和 doc 文档：冻结当前需求/设计/测试计划，标注真实功能、剩余 Mock、SAM3 授权边界和后续实施顺序。
2026-05-01 15:26:25 +08:00
parent f020ff3b4f
commit 689a9ba283
48 changed files with 3280 additions and 176 deletions
--- a/backend/services/media_task_runner.py
+++ b/backend/services/media_task_runner.py
@@ -20,9 +20,11 @@ from services.frame_parser import (
    upload_frames_to_minio,
 )
 from statuses import (
+    PROJECT_STATUS_PENDING,
    PROJECT_STATUS_ERROR,
    PROJECT_STATUS_PARSING,
    PROJECT_STATUS_READY,
+    TASK_STATUS_CANCELLED,
    TASK_STATUS_FAILED,
    TASK_STATUS_RUNNING,
    TASK_STATUS_SUCCESS,
@@ -31,6 +33,10 @@ from statuses import (
 logger = logging.getLogger(__name__)


+class TaskCancelled(RuntimeError):
+    """Raised internally when a persisted task has been cancelled."""
+
+
 def _now() -> datetime:
    return datetime.now(timezone.utc)

@@ -66,12 +72,29 @@ def _set_task_state(
    publish_task_progress_event(task)


+def _project_status_after_stop(project: Project) -> str:
+    return PROJECT_STATUS_READY if project.frames else PROJECT_STATUS_PENDING
+
+
+def _ensure_not_cancelled(db: Session, task: ProcessingTask) -> None:
+    db.refresh(task)
+    if task.status == TASK_STATUS_CANCELLED:
+        raise TaskCancelled("Task was cancelled")
+
+
 def run_parse_media_task(db: Session, task_id: int) -> dict[str, Any]:
    """Parse one project's media and update task progress in the database."""
    task = db.query(ProcessingTask).filter(ProcessingTask.id == task_id).first()
    if not task:
        raise ValueError(f"Task not found: {task_id}")

+    if task.status == TASK_STATUS_CANCELLED:
+        return {
+            "task_id": task.id,
+            "status": TASK_STATUS_CANCELLED,
+            "message": task.message or "任务已取消",
+        }
+
    if task.project_id is None:
        _set_task_state(
            db,
@@ -111,6 +134,7 @@ def run_parse_media_task(db: Session, task_id: int) -> dict[str, Any]:
        db.commit()
        raise ValueError("Project has no media uploaded")

+    _ensure_not_cancelled(db, task)
    project.status = PROJECT_STATUS_PARSING
    _set_task_state(db, task, status=TASK_STATUS_RUNNING, progress=5, message="后台解析已启动", started=True)

@@ -121,6 +145,7 @@ def run_parse_media_task(db: Session, task_id: int) -> dict[str, Any]:
    os.makedirs(output_dir, exist_ok=True)

    try:
+        _ensure_not_cancelled(db, task)
        _set_task_state(db, task, progress=15, message="正在下载媒体文件")
        if effective_source == "dicom":
            dcm_dir = os.path.join(tmp_dir, "dcm")
@@ -129,20 +154,24 @@ def run_parse_media_task(db: Session, task_id: int) -> dict[str, Any]:
            client = get_minio_client()
            objects = list(client.list_objects(BUCKET_NAME, prefix=project.video_path, recursive=True))
            for obj in objects:
+                _ensure_not_cancelled(db, task)
                if obj.object_name.lower().endswith(".dcm"):
                    data = download_file(obj.object_name)
                    local_dcm = os.path.join(dcm_dir, os.path.basename(obj.object_name))
                    with open(local_dcm, "wb") as f:
                        f.write(data)

+            _ensure_not_cancelled(db, task)
            _set_task_state(db, task, progress=35, message="正在解析 DICOM 序列")
            frame_files = parse_dicom(dcm_dir, output_dir)
        else:
+            _ensure_not_cancelled(db, task)
            media_bytes = download_file(project.video_path)
            local_path = os.path.join(tmp_dir, Path(project.video_path).name)
            with open(local_path, "wb") as f:
                f.write(media_bytes)

+            _ensure_not_cancelled(db, task)
            _set_task_state(db, task, progress=35, message="正在使用 FFmpeg/OpenCV 拆帧")
            frame_files, original_fps = parse_video(local_path, output_dir, fps=int(parse_fps))
            project.original_fps = original_fps
@@ -158,12 +187,15 @@ def run_parse_media_task(db: Session, task_id: int) -> dict[str, Any]:
            except Exception as exc:  # noqa: BLE001
                logger.warning("Thumbnail extraction failed: %s", exc)

+        _ensure_not_cancelled(db, task)
        _set_task_state(db, task, progress=70, message="正在上传帧到对象存储")
        object_names = upload_frames_to_minio(frame_files, project.id)

+        _ensure_not_cancelled(db, task)
        _set_task_state(db, task, progress=85, message="正在写入帧索引")
        frames_out = []
        for idx, obj_name in enumerate(object_names):
+            _ensure_not_cancelled(db, task)
            local_frame = frame_files[idx]
            try:
                import cv2
@@ -203,6 +235,23 @@ def run_parse_media_task(db: Session, task_id: int) -> dict[str, Any]:
        )
        logger.info("Parsed %d frames for project_id=%s", len(frames_out), project.id)
        return result
+    except TaskCancelled:
+        project.status = _project_status_after_stop(project)
+        task.status = TASK_STATUS_CANCELLED
+        task.progress = 100
+        task.message = task.message or "任务已取消"
+        task.error = task.error or "Cancelled by user"
+        task.finished_at = task.finished_at or _now()
+        db.commit()
+        db.refresh(task)
+        publish_task_progress_event(task)
+        logger.info("Parse task cancelled: task_id=%s project_id=%s", task.id, project.id)
+        return {
+            "task_id": task.id,
+            "project_id": project.id,
+            "status": TASK_STATUS_CANCELLED,
+            "message": task.message,
+        }
    except Exception as exc:  # noqa: BLE001
        project.status = PROJECT_STATUS_ERROR
        _set_task_state(
--- a/backend/services/sam3_engine.py
+++ b/backend/services/sam3_engine.py
@@ -9,8 +9,14 @@ the package.
 from __future__ import annotations

 import importlib.util
+import json
 import logging
+import os
+import subprocess
 import sys
+import tempfile
+import time
+from pathlib import Path
 from typing import Any

 import numpy as np
@@ -41,6 +47,8 @@ class SAM3Engine:
        self._processor: Any | None = None
        self._model_loaded = False
        self._last_error: str | None = None
+        self._external_status_cache: dict[str, Any] | None = None
+        self._external_status_checked_at = 0.0

    def _python_ok(self) -> bool:
        return sys.version_info >= (3, 12)
@@ -51,6 +59,81 @@ class SAM3Engine:
    def _can_load(self) -> bool:
        return bool(SAM3_PACKAGE_AVAILABLE and TORCH_AVAILABLE and self._python_ok() and self._gpu_ok())

+    def _worker_path(self) -> Path:
+        return Path(__file__).with_name("sam3_external_worker.py")
+
+    def _external_python_exists(self) -> bool:
+        return bool(settings.sam3_external_enabled and os.path.isfile(settings.sam3_external_python))
+
+    def _external_status(self, force: bool = False) -> dict[str, Any]:
+        now = time.monotonic()
+        if (
+            not force
+            and self._external_status_cache is not None
+            and now - self._external_status_checked_at < settings.sam3_status_cache_seconds
+        ):
+            return self._external_status_cache
+
+        if not settings.sam3_external_enabled:
+            status = {
+                "available": False,
+                "package_available": False,
+                "python_ok": False,
+                "torch_ok": False,
+                "cuda_available": False,
+                "device": "unavailable",
+                "message": "SAM 3 external runtime is disabled.",
+            }
+        elif not self._external_python_exists():
+            status = {
+                "available": False,
+                "package_available": False,
+                "python_ok": False,
+                "torch_ok": False,
+                "cuda_available": False,
+                "device": "unavailable",
+                "message": f"SAM 3 external Python not found: {settings.sam3_external_python}",
+            }
+        else:
+            try:
+                env = os.environ.copy()
+                env["SAM3_MODEL_VERSION"] = settings.sam3_model_version
+                completed = subprocess.run(
+                    [settings.sam3_external_python, str(self._worker_path()), "--status"],
+                    capture_output=True,
+                    text=True,
+                    timeout=min(settings.sam3_timeout_seconds, 30),
+                    check=False,
+                    env=env,
+                )
+                if completed.returncode != 0:
+                    detail = completed.stderr.strip() or completed.stdout.strip()
+                    status = {
+                        "available": False,
+                        "package_available": False,
+                        "python_ok": False,
+                        "torch_ok": False,
+                        "cuda_available": False,
+                        "device": "unavailable",
+                        "message": f"SAM 3 external status failed: {detail}",
+                    }
+                else:
+                    status = json.loads(completed.stdout)
+            except Exception as exc:  # noqa: BLE001
+                status = {
+                    "available": False,
+                    "package_available": False,
+                    "python_ok": False,
+                    "torch_ok": False,
+                    "cuda_available": False,
+                    "device": "unavailable",
+                    "message": f"SAM 3 external status failed: {exc}",
+                }
+
+        self._external_status_cache = status
+        self._external_status_checked_at = now
+        return status
+
    def _load_model(self) -> None:
        if self._model_loaded:
            return
@@ -92,26 +175,86 @@ class SAM3Engine:
        return "SAM 3 dependencies are present; model will load on first inference."

    def status(self) -> dict:
-        available = self._can_load()
+        external_status = self._external_status()
+        available = bool(self._can_load() or external_status.get("available"))
+        external_ready = bool(external_status.get("available"))
+        message = self._last_error or self._status_message()
+        if self._processor is not None:
+            message = "SAM 3 model loaded and ready."
+        elif external_ready:
+            message = "SAM 3 external runtime is ready; model will load in the helper process on inference."
+        elif external_status.get("message") and not self._can_load():
+            message = str(external_status["message"])
        return {
            "id": "sam3",
            "label": "SAM 3",
            "available": available,
            "loaded": self._processor is not None,
-            "device": "cuda" if self._gpu_ok() else "unavailable",
+            "device": "cuda" if self._gpu_ok() else str(external_status.get("device", "unavailable")),
            "supports": ["semantic"],
-            "message": "SAM 3 model loaded and ready." if self._processor is not None else (self._last_error or self._status_message()),
-            "package_available": SAM3_PACKAGE_AVAILABLE,
-            "checkpoint_exists": SAM3_PACKAGE_AVAILABLE,
+            "message": message,
+            "package_available": bool(SAM3_PACKAGE_AVAILABLE or external_status.get("package_available")),
+            "checkpoint_exists": bool(SAM3_PACKAGE_AVAILABLE or external_status.get("checkpoint_access")),
            "checkpoint_path": f"official/HuggingFace ({settings.sam3_model_version})",
-            "python_ok": self._python_ok(),
-            "torch_ok": TORCH_AVAILABLE,
+            "python_ok": bool(self._python_ok() or external_status.get("python_ok")),
+            "torch_ok": bool(TORCH_AVAILABLE or external_status.get("torch_ok")),
            "cuda_required": True,
+            "external_available": external_ready,
+            "external_python": settings.sam3_external_python if settings.sam3_external_enabled else None,
        }

+    def _predict_semantic_external(self, image: np.ndarray, text: str) -> tuple[list[list[list[float]]], list[float]]:
+        status = self._external_status(force=True)
+        if not status.get("available"):
+            raise RuntimeError(status.get("message") or "SAM 3 external runtime is unavailable.")
+
+        with tempfile.TemporaryDirectory(prefix="sam3_") as tmpdir:
+            tmp_path = Path(tmpdir)
+            image_path = tmp_path / "image.png"
+            request_path = tmp_path / "request.json"
+            Image.fromarray(image).save(image_path)
+            request_path.write_text(
+                json.dumps(
+                    {
+                        "image_path": str(image_path),
+                        "text": text.strip(),
+                        "model_version": settings.sam3_model_version,
+                        "confidence_threshold": settings.sam3_confidence_threshold,
+                    },
+                    ensure_ascii=False,
+                ),
+                encoding="utf-8",
+            )
+            env = os.environ.copy()
+            env["SAM3_MODEL_VERSION"] = settings.sam3_model_version
+            completed = subprocess.run(
+                [settings.sam3_external_python, str(self._worker_path()), "--request", str(request_path)],
+                capture_output=True,
+                text=True,
+                timeout=settings.sam3_timeout_seconds,
+                check=False,
+                env=env,
+            )
+
+        if completed.returncode != 0:
+            detail = completed.stderr.strip() or completed.stdout.strip()
+            try:
+                parsed = json.loads(detail)
+                detail = parsed.get("error", detail)
+            except Exception:  # noqa: BLE001
+                pass
+            raise RuntimeError(f"SAM 3 external inference failed: {detail}")
+
+        payload = json.loads(completed.stdout)
+        if payload.get("error"):
+            raise RuntimeError(str(payload["error"]))
+        return payload.get("polygons", []), payload.get("scores", [])
+
    def predict_semantic(self, image: np.ndarray, text: str) -> tuple[list[list[list[float]]], list[float]]:
        if not text.strip():
            raise ValueError("SAM 3 semantic prompt requires non-empty text.")
+        if not self._can_load() and self._external_status().get("available"):
+            return self._predict_semantic_external(image, text)
        if not self._ensure_ready():
            raise RuntimeError(self.status()["message"])

--- a/backend/services/sam3_external_worker.py
+++ b/backend/services/sam3_external_worker.py
@@ -0,0 +1,190 @@
+"""Standalone SAM 3 helper for the dedicated Python 3.12 runtime.
+
+The main FastAPI backend can keep running in the existing Python 3.11/SAM 2
+environment while this helper is executed with a separate conda env that meets
+SAM 3's stricter runtime requirements.
+"""
+
+from __future__ import annotations
+
+import argparse
+import importlib.util
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from PIL import Image
+
+
+def _torch_status() -> tuple[bool, str | None, str | None, str | None]:
+    try:
+        import torch
+
+        cuda_available = bool(torch.cuda.is_available())
+        return (
+            cuda_available,
+            getattr(torch, "__version__", None),
+            getattr(torch.version, "cuda", None),
+            torch.cuda.get_device_name(0) if cuda_available else None,
+        )
+    except Exception:  # noqa: BLE001
+        return False, None, None, None
+
+
+def _compact_error(exc: Exception) -> str:
+    lines = [line.strip() for line in str(exc).splitlines() if line.strip()]
+    for line in lines:
+        if "Access to model" in line or "Cannot access gated repo" in line:
+            return line
+    return lines[0] if lines else exc.__class__.__name__
+
+
+def _checkpoint_access(model_version: str) -> tuple[bool, str | None]:
+    try:
+        from huggingface_hub import hf_hub_download
+
+        repo_id = "facebook/sam3.1" if model_version == "sam3.1" else "facebook/sam3"
+        hf_hub_download(repo_id=repo_id, filename="config.json")
+        return True, None
+    except Exception as exc:  # noqa: BLE001
+        return False, _compact_error(exc)
+
+
+def runtime_status() -> dict[str, Any]:
+    model_version = os.environ.get("SAM3_MODEL_VERSION", "sam3")
+    package_error = None
+    package_available = importlib.util.find_spec("sam3") is not None
+    if package_available:
+        try:
+            import sam3  # noqa: F401
+        except Exception as exc:  # noqa: BLE001
+            package_available = False
+            package_error = str(exc)
+    cuda_available, torch_version, cuda_version, device_name = _torch_status()
+    python_ok = sys.version_info >= (3, 12)
+    checkpoint_access = False
+    checkpoint_error = None
+    if package_available:
+        checkpoint_access, checkpoint_error = _checkpoint_access(model_version)
+    available = bool(package_available and python_ok and cuda_available and checkpoint_access)
+    missing = []
+    if not python_ok:
+        missing.append("Python 3.12+ runtime")
+    if not package_available:
+        missing.append(f"sam3 package ({package_error})" if package_error else "sam3 package")
+    if torch_version is None:
+        missing.append("PyTorch")
+    if not cuda_available:
+        missing.append("CUDA GPU")
+    if package_available and not checkpoint_access:
+        missing.append(f"Hugging Face checkpoint access ({checkpoint_error})")
+    return {
+        "available": available,
+        "package_available": package_available,
+        "checkpoint_access": checkpoint_access,
+        "python_ok": python_ok,
+        "torch_ok": torch_version is not None,
+        "torch_version": torch_version,
+        "cuda_version": cuda_version,
+        "cuda_available": cuda_available,
+        "device": "cuda" if cuda_available else "unavailable",
+        "device_name": device_name,
+        "message": (
+            "SAM 3 external runtime is ready."
+            if available
+            else f"SAM 3 external runtime unavailable: missing {', '.join(missing)}."
+        ),
+    }
+
+
+def _mask_to_polygon(mask: np.ndarray) -> list[list[float]]:
+    import cv2
+
+    if mask.dtype != np.uint8:
+        mask = (mask > 0).astype(np.uint8)
+    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    height, width = mask.shape[:2]
+    largest = []
+    for contour in contours:
+        if len(contour) > len(largest):
+            largest = contour
+    if len(largest) < 3:
+        return []
+    return [[float(point[0][0]) / width, float(point[0][1]) / height] for point in largest]
+
+
+def _to_numpy(value: Any) -> np.ndarray:
+    if hasattr(value, "detach"):
+        value = value.detach().cpu().numpy()
+    elif hasattr(value, "cpu"):
+        value = value.cpu().numpy()
+    return np.asarray(value)
+
+
+def predict(request_path: Path) -> dict[str, Any]:
+    import torch
+    from sam3.model.sam3_image_processor import Sam3Processor
+    from sam3.model_builder import build_sam3_image_model
+
+    payload = json.loads(request_path.read_text(encoding="utf-8"))
+    image_path = Path(payload["image_path"])
+    text = str(payload["text"]).strip()
+    threshold = float(payload.get("confidence_threshold", 0.5))
+    if not text:
+        raise ValueError("SAM 3 semantic prompt requires non-empty text.")
+
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
+
+    image = Image.open(image_path).convert("RGB")
+    with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
+        model = build_sam3_image_model()
+        processor = Sam3Processor(model, confidence_threshold=threshold)
+        state = processor.set_image(image)
+        output = processor.set_text_prompt(state=state, prompt=text)
+
+    masks = _to_numpy(output.get("masks", []))
+    scores = _to_numpy(output.get("scores", []))
+    if masks.ndim == 4:
+        masks = masks[:, 0]
+    elif masks.ndim == 3 and masks.shape[0] == 1:
+        masks = masks[None, 0]
+
+    polygons = []
+    for mask in masks:
+        polygon = _mask_to_polygon(mask)
+        if polygon:
+            polygons.append(polygon)
+
+    return {
+        "polygons": polygons,
+        "scores": scores.astype(float).tolist() if scores.size else [],
+    }
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="SAM 3 external runtime helper")
+    parser.add_argument("--status", action="store_true")
+    parser.add_argument("--request", type=Path)
+    args = parser.parse_args()
+
+    try:
+        if args.status:
+            print(json.dumps(runtime_status(), ensure_ascii=False))
+            return 0
+        if args.request:
+            print(json.dumps(predict(args.request), ensure_ascii=False))
+            return 0
+        parser.error("Use --status or --request")
+    except Exception as exc:  # noqa: BLE001
+        print(json.dumps({"error": str(exc)}, ensure_ascii=False), file=sys.stderr)
+        return 1
+
+    return 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())