feat: 建立 SAM2 标注闭环基线

- 打通工作区真实标注闭环：支持手工多边形、矩形、圆形、点区域和线段生成 mask，并可保存、回显、更新和删除后端 annotation。 - 增强 polygon 编辑器：支持顶点拖动、顶点删除、边中点插入、多 polygon 子区域选择编辑，以及区域合并和区域去除。 - 接入 GT mask 导入：后端支持二值/多类别 mask 拆分、contour 转 polygon、distance transform seed point，前端支持导入、回显和 seed point 拖动编辑。 - 完善导出能力：COCO JSON 导出对齐前端，PNG mask ZIP 同时包含单标注 mask、按 zIndex 融合的 semantic_frame 和 semantic_classes.json。 - 打通异步任务管理：新增任务取消、重试、失败详情接口与 Dashboard 控件，worker 支持取消状态检查并通过 Redis/WebSocket 推送 cancelled 事件。 - 对接 Dashboard 后端数据：概览统计、解析队列和实时流转记录从 FastAPI 聚合接口与 WebSocket 更新。 - 增强 AI 推理参数：前端发送 crop_to_prompt、auto_filter_background 和 min_score，后端支持点/框 prompt 局部裁剪推理、结果回映射和负向点/低分过滤。 - 接入 SAM3 基础设施：新增独立 Python 3.12 sam3 环境安装脚本、外部 worker helper、后端桥接和真实 Python/CUDA/包/HF checkpoint access 状态检测。 - 保留 SAM3 授权边界：当前官方 facebook/sam3 gated 权重未授权时状态接口会返回不可用，不伪装成可推理。 - 增强前端状态管理：新增 mask undo/redo 历史栈、AI 模型选择状态、保存状态 dirty/draft/saved 流转和项目状态归一化。 - 更新前端 API 封装：补充 annotation CRUD、GT mask import、mask ZIP export、task cancel/retry/detail、AI runtime status 和 prediction options。 - 更新 UI 控件：ToolsPalette、AISegmentation、VideoWorkspace 和 CanvasArea 接入真实操作、导入导出、撤销重做、任务控制和模型状态。 - 新增 polygon-clipping 依赖，用于前端区域 union/difference 几何运算。 - 完善后端 schemas/status/progress：补充 AI 模型外部状态字段、任务 cancelled 状态和进度事件 payload。 - 补充测试覆盖：新增后端任务控制、SAM3 桥接、GT mask、导出融合、AI options 测试；补充前端 Canvas、Dashboard、VideoWorkspace、ToolsPalette、API 和 store 测试。 - 更新 README、AGENTS 和 doc 文档：冻结当前需求/设计/测试计划，标注真实功能、剩余 Mock、SAM3 授权边界和后续实施顺序。
2026-05-01 15:26:25 +08:00
parent f020ff3b4f
commit 689a9ba283
48 changed files with 3280 additions and 176 deletions
--- a/backend/tests/test_ai.py
+++ b/backend/tests/test_ai.py
@@ -1,4 +1,5 @@
 import numpy as np
+import cv2


 def _create_project_and_frame(client):
@@ -46,6 +47,46 @@ def test_predict_accepts_point_object_with_labels(client, monkeypatch):
    assert calls["args"] == ([[0.5, 0.5], [0.1, 0.1]], [1, 0])


+def test_predict_applies_crop_and_background_filter_options(client, monkeypatch):
+    _, frame, _ = _create_project_and_frame(client)
+    calls = {}
+    monkeypatch.setattr("routers.ai._load_frame_image", lambda frame: np.zeros((100, 200, 3), dtype=np.uint8))
+
+    def fake_predict_points(model, image, points, labels):
+        calls["shape"] = image.shape
+        calls["points"] = points
+        calls["labels"] = labels
+        return (
+            [
+                [[0.0, 0.0], [0.2, 0.0], [0.2, 0.2]],
+                [[0.45, 0.45], [0.55, 0.45], [0.55, 0.55]],
+            ],
+            [0.9, 0.01],
+        )
+
+    monkeypatch.setattr("routers.ai.sam_registry.predict_points", fake_predict_points)
+
+    response = client.post("/api/ai/predict", json={
+        "image_id": frame["id"],
+        "prompt_type": "point",
+        "prompt_data": {"points": [[0.5, 0.5], [0.52, 0.52]], "labels": [1, 0]},
+        "options": {
+            "crop_to_prompt": True,
+            "crop_margin": 0.1,
+            "auto_filter_background": True,
+            "min_score": 0.05,
+        },
+    })
+
+    assert response.status_code == 200
+    assert calls["shape"][0] < 100
+    assert calls["shape"][1] < 200
+    assert calls["labels"] == [1, 0]
+    assert response.json()["scores"] == [0.9]
+    polygon = response.json()["polygons"][0]
+    assert all(0.0 <= coord <= 1.0 for point in polygon for coord in point)
+
+
 def test_predict_box_and_semantic_fallback(client, monkeypatch):
    _, frame, _ = _create_project_and_frame(client)
    monkeypatch.setattr("routers.ai._load_frame_image", lambda frame: np.zeros((10, 10, 3), dtype=np.uint8))
@@ -246,3 +287,62 @@ def test_update_and_delete_annotation_validation(client):
        f"/api/ai/annotations/{saved['id']}",
        json={"template_id": 999},
    ).status_code == 404
+
+
+def test_import_gt_mask_creates_annotations_with_seed_points(client):
+    project, frame, template = _create_project_and_frame(client)
+    mask = np.zeros((360, 640), dtype=np.uint8)
+    cv2.rectangle(mask, (100, 80), (260, 220), 255, thickness=-1)
+    ok, encoded = cv2.imencode(".png", mask)
+    assert ok
+
+    response = client.post(
+        "/api/ai/import-gt-mask",
+        data={
+            "project_id": str(project["id"]),
+            "frame_id": str(frame["id"]),
+            "template_id": str(template["id"]),
+            "label": "Imported GT",
+            "color": "#22c55e",
+        },
+        files={"file": ("mask.png", encoded.tobytes(), "image/png")},
+    )
+
+    assert response.status_code == 201
+    body = response.json()
+    assert len(body) == 1
+    assert body[0]["project_id"] == project["id"]
+    assert body[0]["frame_id"] == frame["id"]
+    assert body[0]["template_id"] == template["id"]
+    assert body[0]["mask_data"]["label"] == "Imported GT"
+    assert body[0]["mask_data"]["source"] == "gt_mask"
+    assert body[0]["mask_data"]["gt_label_value"] == 255
+    assert len(body[0]["mask_data"]["polygons"][0]) >= 3
+    assert len(body[0]["points"]) == 1
+    assert 0.0 <= body[0]["points"][0][0] <= 1.0
+    assert 0.0 <= body[0]["points"][0][1] <= 1.0
+
+
+def test_import_gt_mask_splits_label_values(client):
+    project, frame, _ = _create_project_and_frame(client)
+    mask = np.zeros((360, 640), dtype=np.uint8)
+    cv2.rectangle(mask, (20, 20), (120, 120), 1, thickness=-1)
+    cv2.rectangle(mask, (220, 80), (320, 180), 2, thickness=-1)
+    ok, encoded = cv2.imencode(".png", mask)
+    assert ok
+
+    response = client.post(
+        "/api/ai/import-gt-mask",
+        data={
+            "project_id": str(project["id"]),
+            "frame_id": str(frame["id"]),
+            "label": "GT Class",
+        },
+        files={"file": ("labels.png", encoded.tobytes(), "image/png")},
+    )
+
+    assert response.status_code == 201
+    body = sorted(response.json(), key=lambda item: item["mask_data"]["gt_label_value"])
+    assert [item["mask_data"]["gt_label_value"] for item in body] == [1, 2]
+    assert [item["mask_data"]["label"] for item in body] == ["GT Class 1", "GT Class 2"]
+    assert all(len(item["points"]) == 1 for item in body)