feat: 完善 SAM2.1 模型选择与标注工作流

- 后端 SAM2 引擎新增 sam2.1_hiera_tiny、sam2.1_hiera_small、sam2.1_hiera_base_plus、sam2.1_hiera_large 四个变体定义,并按变体维护 checkpoint/config、image predictor、video predictor、加载状态、错误信息和真实状态回报。

- 后端 SAM registry 仅暴露当前产品启用的 SAM2.1 变体,保留 sam2 作为 tiny 兼容别名,拒绝 sam3 产品入口,并把 point、box、interactive、auto、propagate 都分发到所选 SAM2.1 变体。

- 后端默认配置和下载脚本切换到 SAM2.1 checkpoint 命名,支持 legacy SAM2 checkpoint fallback,并在状态消息中标出 fallback 使用情况。

- 前端全局 AI 模型状态新增 SAM2.1 tiny/small/base+/large 类型和默认 tiny,API 请求默认携带 sam2.1_hiera_tiny,AI 页面提供模型变体选择和所选模型状态展示。

- AI 智能分割页移除当前产品不使用的 SAM3/文本提示入口,保留正向点、反向点、框选和参数开关;AI 页只展示本页生成的候选 mask,并支持遮罩清晰度调节、候选 mask 上继续加正/反点、清空本页候选、推送到工作区编辑。

- 工作区和 Canvas 补强 SAM2 交互式细化链路:框选后正/反点继续细化同一个候选 mask,反向点请求启用背景过滤,空结果会移除被否定候选;AI 推送到工作区后保留选中态和未保存 draft mask。

- 工作区标注保存闭环补强:未保存 mask 可归档保存,dirty saved mask 可更新,保存后用后端 saved annotation 替换已提交 draft,清空/删除已保存 mask 时同步后端删除。

- Dashboard 任务进度区改为展示 queued、running、success、failed、cancelled 最近任务,处理中统计只计算 queued/running,并保留近期完成记录。

- 时间轴在顶部时间进度条和底部缩略图导航轴之间新增已编辑帧标记带,基于当前项目帧内 masks 标出已有编辑/标注的帧,并支持点击标记跳转。

- 前端测试覆盖 SAM2.1 变体选择、模型状态徽标、AI 页候选隔离、遮罩透明度、候选上追加正/反点、推送工作区保留选择、Canvas 交互式细化、VideoWorkspace 传播/保存、Dashboard 进度和时间轴已编辑帧标记。

- 后端测试覆盖 SAM2.1 变体状态、sam2 alias 兼容、sam3 禁用、semantic 禁用、传播标注保存、Dashboard 最近任务状态和 SAM3 历史测试跳过说明。

- README、AGENTS 和 doc 文档同步当前真实进度,更新 SAM2.1 变体、SAM3 禁用、接口契约、设计冻结、需求冻结、前端元素审计、实施计划、FastAPI docs 说明和测试矩阵。
This commit is contained in:
2026-05-01 23:39:53 +08:00
parent 8a9247075e
commit 29a1a87e52
38 changed files with 1087 additions and 631 deletions

View File

@@ -19,9 +19,9 @@ class Settings(BaseSettings):
minio_secure: bool = False
# SAM
sam_default_model: str = "sam2"
sam_model_path: str = "/home/wkmgc/Desktop/Seg_Server/models/sam2_hiera_tiny.pt"
sam_model_config: str = "configs/sam2/sam2_hiera_t.yaml"
sam_default_model: str = "sam2.1_hiera_tiny"
sam_model_path: str = "/home/wkmgc/Desktop/Seg_Server/models/sam2.1_hiera_tiny.pt"
sam_model_config: str = "configs/sam2.1/sam2.1_hiera_t.yaml"
sam3_model_version: str = "sam3"
sam3_checkpoint_path: str = "/home/wkmgc/Desktop/Seg_Server/sam3权重/sam3.pt"
sam3_external_enabled: bool = True

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3
"""
SAM 2 模型权重下载脚本
SAM 2.1 模型权重下载脚本
运行: python download_sam2.py
"""
import os
@@ -10,12 +10,12 @@ import sys
MODEL_DIR = "/home/wkmgc/Desktop/Seg_Server/models"
os.makedirs(MODEL_DIR, exist_ok=True)
# SAM 2 模型权重 (Meta AI 官方)
# SAM 2.1 模型权重 (Meta AI 官方)
MODELS = {
"sam2_hiera_tiny.pt": "https://dl.fbaipublicfiles.com/segment_anything_2/072824/sam2_hiera_tiny.pt",
"sam2_hiera_small.pt": "https://dl.fbaipublicfiles.com/segment_anything_2/072824/sam2_hiera_small.pt",
"sam2_hiera_base_plus.pt": "https://dl.fbaipublicfiles.com/segment_anything_2/072824/sam2_hiera_base_plus.pt",
"sam2_hiera_large.pt": "https://dl.fbaipublicfiles.com/segment_anything_2/072824/sam2_hiera_large.pt",
"sam2.1_hiera_tiny.pt": "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_tiny.pt",
"sam2.1_hiera_small.pt": "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_small.pt",
"sam2.1_hiera_base_plus.pt": "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_base_plus.pt",
"sam2.1_hiera_large.pt": "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_large.pt",
}
def download_file(url: str, dest: str):
@@ -35,7 +35,7 @@ def download_file(url: str, dest: str):
def main():
print("=" * 50)
print("SAM 2 模型权重下载")
print("SAM 2.1 模型权重下载")
print("=" * 50)
for name, url in MODELS.items():
dest = os.path.join(MODEL_DIR, name)

View File

@@ -231,7 +231,7 @@ def predict(payload: PredictRequest, db: Session = Depends(get_db)) -> dict:
coordinates or `{ "points": [[x, y], ...], "labels": [1, 0, ...] }`.
- **box**: `prompt_data` is `[x1, y1, x2, y2]` normalized coordinates.
- **interactive**: `prompt_data` is `{ "box": [...], "points": [[x, y]], "labels": [1, 0] }`.
- **semantic**: SAM 3 text prompt when model=`sam3`; SAM 2 falls back to auto.
- **semantic**: disabled in the current SAM 2.1 point/box product flow.
"""
frame = db.query(Frame).filter(Frame.id == payload.image_id).first()
if not frame:
@@ -382,7 +382,7 @@ def predict(payload: PredictRequest, db: Session = Depends(get_db)) -> dict:
summary="Get SAM model and GPU runtime status",
)
def model_status(selected_model: str | None = None) -> dict:
"""Return real runtime availability for GPU, SAM 2, and SAM 3."""
"""Return real runtime availability for GPU and the currently enabled SAM model."""
try:
return sam_registry.runtime_status(selected_model)
except ValueError as exc:
@@ -398,7 +398,7 @@ def propagate(payload: PropagateRequest, db: Session = Depends(get_db)) -> dict:
"""Track one selected region from the current frame across nearby frames.
SAM 2 uses the official video predictor with the selected mask as the seed.
SAM 3 uses the external Python 3.12 video tracker with the seed bbox.
SAM 3 video tracking is currently disabled in this product flow.
"""
direction = payload.direction.lower()
if direction not in {"forward", "backward", "both"}:

View File

@@ -14,7 +14,7 @@ from models import Annotation, Frame, ProcessingTask, Project, Template
router = APIRouter(prefix="/api/dashboard", tags=["Dashboard"])
ACTIVE_TASK_STATUSES = {"queued", "running"}
MONITORED_TASK_STATUSES = {"queued", "running", "failed", "cancelled"}
MONITORED_TASK_STATUSES = {"queued", "running", "success", "failed", "cancelled"}
def _system_load_percent() -> int:

View File

@@ -204,7 +204,7 @@ class PropagationSeed(BaseModel):
class PropagateRequest(BaseModel):
project_id: int
frame_id: int
model: Optional[str] = "sam2"
model: Optional[str] = "sam2.1_hiera_tiny"
seed: PropagationSeed
direction: str = "forward"
max_frames: int = 30

View File

@@ -2,6 +2,8 @@
import logging
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
import numpy as np
@@ -10,6 +12,67 @@ from config import settings
logger = logging.getLogger(__name__)
DEFAULT_SAM2_MODEL_ID = "sam2.1_hiera_tiny"
@dataclass(frozen=True)
class SAM2Variant:
"""One selectable SAM 2.1 runtime variant."""
id: str
label: str
short_label: str
config: str
legacy_config: str
checkpoint_filename: str
legacy_checkpoint_filename: str
SAM2_VARIANTS: dict[str, SAM2Variant] = {
"sam2.1_hiera_tiny": SAM2Variant(
id="sam2.1_hiera_tiny",
label="SAM 2.1 Tiny",
short_label="tiny",
config="configs/sam2.1/sam2.1_hiera_t.yaml",
legacy_config="configs/sam2/sam2_hiera_t.yaml",
checkpoint_filename="sam2.1_hiera_tiny.pt",
legacy_checkpoint_filename="sam2_hiera_tiny.pt",
),
"sam2.1_hiera_small": SAM2Variant(
id="sam2.1_hiera_small",
label="SAM 2.1 Small",
short_label="small",
config="configs/sam2.1/sam2.1_hiera_s.yaml",
legacy_config="configs/sam2/sam2_hiera_s.yaml",
checkpoint_filename="sam2.1_hiera_small.pt",
legacy_checkpoint_filename="sam2_hiera_small.pt",
),
"sam2.1_hiera_base_plus": SAM2Variant(
id="sam2.1_hiera_base_plus",
label="SAM 2.1 Base+",
short_label="base+",
config="configs/sam2.1/sam2.1_hiera_b+.yaml",
legacy_config="configs/sam2/sam2_hiera_b+.yaml",
checkpoint_filename="sam2.1_hiera_base_plus.pt",
legacy_checkpoint_filename="sam2_hiera_base_plus.pt",
),
"sam2.1_hiera_large": SAM2Variant(
id="sam2.1_hiera_large",
label="SAM 2.1 Large",
short_label="large",
config="configs/sam2.1/sam2.1_hiera_l.yaml",
legacy_config="configs/sam2/sam2_hiera_l.yaml",
checkpoint_filename="sam2.1_hiera_large.pt",
legacy_checkpoint_filename="sam2_hiera_large.pt",
),
}
SAM2_MODEL_ALIASES = {
"sam2": DEFAULT_SAM2_MODEL_ID,
"sam2.1": DEFAULT_SAM2_MODEL_ID,
"sam2_tiny": DEFAULT_SAM2_MODEL_ID,
}
# ---------------------------------------------------------------------------
# Attempt to import PyTorch and SAM 2; fall back to stubs if unavailable.
# ---------------------------------------------------------------------------
@@ -38,115 +101,173 @@ class SAM2Engine:
"""Lazy-loaded SAM 2 inference engine."""
def __init__(self) -> None:
self._predictor: Optional[SAM2ImagePredictor] = None
self._video_predictor = None
self._model_loaded = False
self._video_model_loaded = False
self._loaded_device: str | None = None
self._last_error: str | None = None
self._video_last_error: str | None = None
self._predictors: dict[str, Optional[SAM2ImagePredictor]] = {}
self._video_predictors: dict[str, object | None] = {}
self._model_loaded: dict[str, bool] = {}
self._video_model_loaded: dict[str, bool] = {}
self._loaded_device: dict[str, str] = {}
self._last_error: dict[str, str | None] = {}
self._video_last_error: dict[str, str | None] = {}
# -----------------------------------------------------------------------
# Internal helpers
# -----------------------------------------------------------------------
def _load_model(self) -> None:
def variant_ids(self) -> list[str]:
return list(SAM2_VARIANTS.keys())
def normalize_model_id(self, model_id: str | None) -> str:
selected = (model_id or settings.sam_default_model or DEFAULT_SAM2_MODEL_ID).lower()
selected = SAM2_MODEL_ALIASES.get(selected, selected)
if selected not in SAM2_VARIANTS:
raise ValueError(f"Unsupported SAM2 model: {model_id}")
return selected
def is_sam2_model(self, model_id: str | None) -> bool:
try:
self.normalize_model_id(model_id)
return True
except ValueError:
return False
def _models_dir(self) -> Path:
configured_path = Path(settings.sam_model_path)
return configured_path.parent if configured_path.parent else Path("models")
def _variant(self, model_id: str | None) -> SAM2Variant:
return SAM2_VARIANTS[self.normalize_model_id(model_id)]
def _checkpoint_config(self, model_id: str | None) -> tuple[str, str]:
variant_id = self.normalize_model_id(model_id)
variant = SAM2_VARIANTS[variant_id]
models_dir = self._models_dir()
candidates: list[tuple[str, str]] = []
configured_path = Path(settings.sam_model_path)
if variant_id == DEFAULT_SAM2_MODEL_ID and configured_path.is_file():
candidates.append((settings.sam_model_config, str(configured_path)))
candidates.extend([
(variant.config, str(models_dir / variant.checkpoint_filename)),
(variant.legacy_config, str(models_dir / variant.legacy_checkpoint_filename)),
])
for config, checkpoint_path in candidates:
if os.path.isfile(checkpoint_path):
return config, checkpoint_path
return candidates[0]
def _load_model(self, model_id: str | None = None) -> None:
"""Load the SAM 2 model and predictor on first use."""
if self._model_loaded:
variant_id = self.normalize_model_id(model_id)
if self._model_loaded.get(variant_id):
return
if not TORCH_AVAILABLE:
self._last_error = "PyTorch is not installed."
self._last_error[variant_id] = "PyTorch is not installed."
logger.warning("PyTorch not available; skipping SAM2 model load.")
self._model_loaded = True
self._model_loaded[variant_id] = True
return
if not SAM2_AVAILABLE:
self._last_error = "sam2 package is not installed."
self._last_error[variant_id] = "sam2 package is not installed."
logger.warning("SAM2 not available; skipping model load.")
self._model_loaded = True
self._model_loaded[variant_id] = True
return
if not os.path.isfile(settings.sam_model_path):
self._last_error = f"SAM2 checkpoint not found: {settings.sam_model_path}"
logger.error("SAM checkpoint not found at %s", settings.sam_model_path)
self._model_loaded = True
config, checkpoint_path = self._checkpoint_config(variant_id)
if not os.path.isfile(checkpoint_path):
self._last_error[variant_id] = f"SAM2 checkpoint not found: {checkpoint_path}"
logger.error("SAM checkpoint not found at %s", checkpoint_path)
self._model_loaded[variant_id] = True
return
try:
device = self._best_device()
model = build_sam2(
settings.sam_model_config,
settings.sam_model_path,
config,
checkpoint_path,
device=device,
)
self._predictor = SAM2ImagePredictor(model)
self._model_loaded = True
self._loaded_device = device
self._last_error = None
logger.info("SAM 2 model loaded from %s on %s", settings.sam_model_path, device)
self._predictors[variant_id] = SAM2ImagePredictor(model)
self._model_loaded[variant_id] = True
self._loaded_device[variant_id] = device
self._last_error[variant_id] = None
logger.info("SAM 2 model %s loaded from %s on %s", variant_id, checkpoint_path, device)
except Exception as exc: # noqa: BLE001
self._last_error = str(exc)
logger.error("Failed to load SAM 2 model: %s", exc)
self._model_loaded = True # Prevent repeated load attempts
self._last_error[variant_id] = str(exc)
logger.error("Failed to load SAM 2 model %s: %s", variant_id, exc)
self._model_loaded[variant_id] = True # Prevent repeated load attempts
def _load_video_model(self) -> None:
def _load_video_model(self, model_id: str | None = None) -> None:
"""Load the SAM 2 video predictor on first propagation use."""
if self._video_model_loaded:
variant_id = self.normalize_model_id(model_id)
if self._video_model_loaded.get(variant_id):
return
if not TORCH_AVAILABLE:
self._video_last_error = "PyTorch is not installed."
self._video_model_loaded = True
self._video_last_error[variant_id] = "PyTorch is not installed."
self._video_model_loaded[variant_id] = True
return
if not SAM2_AVAILABLE:
self._video_last_error = "sam2 package is not installed."
self._video_model_loaded = True
self._video_last_error[variant_id] = "sam2 package is not installed."
self._video_model_loaded[variant_id] = True
return
if not os.path.isfile(settings.sam_model_path):
self._video_last_error = f"SAM2 checkpoint not found: {settings.sam_model_path}"
self._video_model_loaded = True
config, checkpoint_path = self._checkpoint_config(variant_id)
if not os.path.isfile(checkpoint_path):
self._video_last_error[variant_id] = f"SAM2 checkpoint not found: {checkpoint_path}"
self._video_model_loaded[variant_id] = True
return
try:
device = self._best_device()
self._video_predictor = build_sam2_video_predictor(
settings.sam_model_config,
settings.sam_model_path,
self._video_predictors[variant_id] = build_sam2_video_predictor(
config,
checkpoint_path,
device=device,
)
self._video_model_loaded = True
self._loaded_device = device
self._video_last_error = None
logger.info("SAM 2 video predictor loaded from %s on %s", settings.sam_model_path, device)
self._video_model_loaded[variant_id] = True
self._loaded_device[variant_id] = device
self._video_last_error[variant_id] = None
logger.info("SAM 2 video predictor %s loaded from %s on %s", variant_id, checkpoint_path, device)
except Exception as exc: # noqa: BLE001
self._video_last_error = str(exc)
self._video_model_loaded = True
logger.error("Failed to load SAM 2 video predictor: %s", exc)
self._video_last_error[variant_id] = str(exc)
self._video_model_loaded[variant_id] = True
logger.error("Failed to load SAM 2 video predictor %s: %s", variant_id, exc)
def _best_device(self) -> str:
if TORCH_AVAILABLE and torch is not None and torch.cuda.is_available():
return "cuda"
return "cpu"
def _ensure_ready(self) -> bool:
def _ensure_ready(self, model_id: str | None = None) -> bool:
"""Ensure the model is loaded; return whether it is usable."""
self._load_model()
return SAM2_AVAILABLE and self._predictor is not None
variant_id = self.normalize_model_id(model_id)
self._load_model(variant_id)
return SAM2_AVAILABLE and self._predictors.get(variant_id) is not None
def _ensure_video_ready(self) -> bool:
def _ensure_video_ready(self, model_id: str | None = None) -> bool:
"""Ensure the video predictor is loaded; return whether it is usable."""
self._load_video_model()
return SAM2_AVAILABLE and self._video_predictor is not None
variant_id = self.normalize_model_id(model_id)
self._load_video_model(variant_id)
return SAM2_AVAILABLE and self._video_predictors.get(variant_id) is not None
def status(self) -> dict:
def status(self, model_id: str | None = None) -> dict:
"""Return lightweight, real runtime status without forcing model load."""
checkpoint_exists = os.path.isfile(settings.sam_model_path)
device = self._loaded_device or self._best_device()
variant_id = self.normalize_model_id(model_id)
variant = SAM2_VARIANTS[variant_id]
_, checkpoint_path = self._checkpoint_config(variant_id)
checkpoint_exists = os.path.isfile(checkpoint_path)
using_legacy_checkpoint = Path(checkpoint_path).name == variant.legacy_checkpoint_filename
predictor = self._predictors.get(variant_id)
device = self._loaded_device.get(variant_id) or self._best_device()
available = bool(TORCH_AVAILABLE and SAM2_AVAILABLE and checkpoint_exists)
if self._predictor is not None:
message = "SAM 2 model loaded and ready."
if predictor is not None:
message = f"{variant.label} model loaded and ready."
elif available:
message = "SAM 2 dependencies and checkpoint are present; model will load on first inference."
message = f"{variant.label} dependencies and checkpoint are present; model will load on first inference."
if using_legacy_checkpoint:
message += " Using legacy SAM 2 checkpoint fallback."
else:
missing = []
if not TORCH_AVAILABLE:
@@ -155,20 +276,21 @@ class SAM2Engine:
missing.append("sam2 package")
if not checkpoint_exists:
missing.append("checkpoint")
message = f"SAM 2 unavailable: missing {', '.join(missing)}."
if self._last_error and not self._predictor:
message = self._last_error
message = f"{variant.label} unavailable: missing {', '.join(missing)}."
last_error = self._last_error.get(variant_id)
if last_error and not predictor:
message = last_error
return {
"id": "sam2",
"label": "SAM 2",
"id": variant.id,
"label": variant.label,
"available": available,
"loaded": self._predictor is not None,
"loaded": predictor is not None,
"device": device,
"supports": ["point", "box", "interactive", "auto", "propagate"],
"message": message,
"package_available": SAM2_AVAILABLE,
"checkpoint_exists": checkpoint_exists,
"checkpoint_path": settings.sam_model_path,
"checkpoint_path": checkpoint_path,
"python_ok": True,
"torch_ok": TORCH_AVAILABLE,
"cuda_required": False,
@@ -179,6 +301,7 @@ class SAM2Engine:
# -----------------------------------------------------------------------
def predict_points(
self,
model_id: str | None,
image: np.ndarray,
points: list[list[float]],
labels: list[int],
@@ -193,18 +316,20 @@ class SAM2Engine:
Returns:
Tuple of (polygons, scores).
"""
if not self._ensure_ready():
variant_id = self.normalize_model_id(model_id)
if not self._ensure_ready(variant_id):
logger.warning("SAM2 not ready; returning dummy masks.")
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
try:
predictor = self._predictors[variant_id]
h, w = image.shape[:2]
pts = np.array([[p[0] * w, p[1] * h] for p in points], dtype=np.float32)
lbls = np.array(labels, dtype=np.int32)
with torch.inference_mode(): # type: ignore[name-defined]
self._predictor.set_image(image)
masks, scores, _ = self._predictor.predict(
predictor.set_image(image)
masks, scores, _ = predictor.predict(
point_coords=pts,
point_labels=lbls,
multimask_output=False,
@@ -223,6 +348,7 @@ class SAM2Engine:
def predict_box(
self,
model_id: str | None,
image: np.ndarray,
box: list[float],
) -> tuple[list[list[list[float]]], list[float]]:
@@ -235,11 +361,13 @@ class SAM2Engine:
Returns:
Tuple of (polygons, scores).
"""
if not self._ensure_ready():
variant_id = self.normalize_model_id(model_id)
if not self._ensure_ready(variant_id):
logger.warning("SAM2 not ready; returning dummy masks.")
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
try:
predictor = self._predictors[variant_id]
h, w = image.shape[:2]
bbox = np.array(
[box[0] * w, box[1] * h, box[2] * w, box[3] * h],
@@ -247,8 +375,8 @@ class SAM2Engine:
)
with torch.inference_mode(): # type: ignore[name-defined]
self._predictor.set_image(image)
masks, scores, _ = self._predictor.predict(
predictor.set_image(image)
masks, scores, _ = predictor.predict(
box=bbox[None, :],
multimask_output=False,
)
@@ -266,17 +394,20 @@ class SAM2Engine:
def predict_interactive(
self,
model_id: str | None,
image: np.ndarray,
box: list[float] | None,
points: list[list[float]],
labels: list[int],
) -> tuple[list[list[list[float]]], list[float]]:
"""Run combined box and point prompt segmentation for refinement."""
if not self._ensure_ready():
variant_id = self.normalize_model_id(model_id)
if not self._ensure_ready(variant_id):
logger.warning("SAM2 not ready; returning dummy masks.")
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
try:
predictor = self._predictors[variant_id]
h, w = image.shape[:2]
bbox = None
if box:
@@ -291,8 +422,8 @@ class SAM2Engine:
lbls = np.array(labels, dtype=np.int32)
with torch.inference_mode(): # type: ignore[name-defined]
self._predictor.set_image(image)
masks, scores, _ = self._predictor.predict(
predictor.set_image(image)
masks, scores, _ = predictor.predict(
point_coords=pts,
point_labels=lbls,
box=bbox,
@@ -310,7 +441,7 @@ class SAM2Engine:
logger.error("SAM2 interactive prediction failed: %s", exc)
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
def predict_auto(self, image: np.ndarray) -> tuple[list[list[list[float]]], list[float]]:
def predict_auto(self, model_id: str | None, image: np.ndarray) -> tuple[list[list[list[float]]], list[float]]:
"""Run automatic mask generation (grid of points).
Args:
@@ -319,20 +450,22 @@ class SAM2Engine:
Returns:
Tuple of (polygons, scores).
"""
if not self._ensure_ready():
variant_id = self.normalize_model_id(model_id)
if not self._ensure_ready(variant_id):
logger.warning("SAM2 not ready; returning dummy masks.")
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
try:
predictor = self._predictors[variant_id]
with torch.inference_mode(): # type: ignore[name-defined]
self._predictor.set_image(image)
predictor.set_image(image)
# Generate a uniform 16x16 grid of point prompts
h, w = image.shape[:2]
grid = np.mgrid[0:1:17j, 0:1:17j].reshape(2, -1).T
pts = grid * np.array([w, h])
lbls = np.ones(pts.shape[0], dtype=np.int32)
masks, scores, _ = self._predictor.predict(
masks, scores, _ = predictor.predict(
point_coords=pts,
point_labels=lbls,
multimask_output=False,
@@ -351,6 +484,7 @@ class SAM2Engine:
def propagate_video(
self,
model_id: str | None,
frame_paths: list[str],
source_frame_index: int,
seed: dict,
@@ -358,8 +492,10 @@ class SAM2Engine:
max_frames: int | None = None,
) -> list[dict]:
"""Propagate one seed mask across a prepared frame directory with SAM 2 video."""
if not self._ensure_video_ready():
raise RuntimeError(self._video_last_error or self.status()["message"])
variant_id = self.normalize_model_id(model_id)
if not self._ensure_video_ready(variant_id):
raise RuntimeError(self._video_last_error.get(variant_id) or self.status(variant_id)["message"])
video_predictor = self._video_predictors[variant_id]
if not frame_paths:
return []
if source_frame_index < 0 or source_frame_index >= len(frame_paths):
@@ -379,12 +515,12 @@ class SAM2Engine:
if not seed_mask.any():
raise ValueError("SAM 2 propagation requires a non-empty seed polygon or bbox.")
inference_state = self._video_predictor.init_state(
inference_state = video_predictor.init_state(
video_path=os.path.dirname(frame_paths[0]),
offload_video_to_cpu=True,
offload_state_to_cpu=True,
)
self._video_predictor.add_new_mask(
video_predictor.add_new_mask(
inference_state,
frame_idx=source_frame_index,
obj_id=1,
@@ -394,7 +530,7 @@ class SAM2Engine:
results: dict[int, dict] = {}
def collect(reverse: bool) -> None:
for out_frame_idx, out_obj_ids, out_mask_logits in self._video_predictor.propagate_in_video(
for out_frame_idx, out_obj_ids, out_mask_logits in video_predictor.propagate_in_video(
inference_state,
start_frame_idx=source_frame_index,
max_frame_num_to_track=max_frames,
@@ -427,7 +563,7 @@ class SAM2Engine:
collect(reverse=True)
try:
self._video_predictor.reset_state(inference_state)
video_predictor.reset_state(inference_state)
except Exception: # noqa: BLE001
pass
return [results[index] for index in sorted(results)]

View File

@@ -5,8 +5,12 @@ from __future__ import annotations
from typing import Any
from config import settings
from services.sam2_engine import TORCH_AVAILABLE, sam_engine as sam2_engine
from services.sam3_engine import sam3_engine
from services.sam2_engine import DEFAULT_SAM2_MODEL_ID, TORCH_AVAILABLE, sam_engine as sam2_engine
# SAM 3 integration is intentionally disabled for the current product flow.
# The source files are kept in the repository so the integration can be
# restored later, but the active registry only exposes SAM 2.
# from services.sam3_engine import sam3_engine
try:
import torch
@@ -24,20 +28,23 @@ class SAMRegistry:
def __init__(self) -> None:
self._engines = {
"sam2": sam2_engine,
"sam3": sam3_engine,
# "sam3": sam3_engine,
}
def normalize_model_id(self, model_id: str | None) -> str:
selected = (model_id or settings.sam_default_model or "sam2").lower()
selected = (model_id or settings.sam_default_model or DEFAULT_SAM2_MODEL_ID).lower()
if self._engines["sam2"].is_sam2_model(selected):
return self._engines["sam2"].normalize_model_id(selected)
if selected not in self._engines:
raise ValueError(f"Unsupported model: {model_id}")
return selected
def runtime_status(self, selected_model: str | None = None) -> dict[str, Any]:
selected = self.normalize_model_id(selected_model)
return {
"selected_model": self.normalize_model_id(selected_model),
"selected_model": selected,
"gpu": self.gpu_status(),
"models": [engine.status() for engine in self._engines.values()],
"models": [sam2_engine.status(model_id) for model_id in sam2_engine.variant_ids()],
}
def gpu_status(self) -> dict[str, Any]:
@@ -52,20 +59,26 @@ class SAMRegistry:
}
def _engine(self, model_id: str | None) -> Any:
return self._engines[self.normalize_model_id(model_id)]
normalized = self.normalize_model_id(model_id)
if self._engines["sam2"].is_sam2_model(normalized):
return self._engines["sam2"]
return self._engines[normalized]
def _ensure_available(self, model_id: str | None) -> Any:
normalized = self.normalize_model_id(model_id)
engine = self._engine(model_id)
status = engine.status()
status = engine.status(normalized) if engine is sam2_engine else engine.status()
if not status["available"]:
raise ModelUnavailableError(status["message"])
return engine
def predict_points(self, model_id: str | None, image: Any, points: list[list[float]], labels: list[int]):
return self._ensure_available(model_id).predict_points(image, points, labels)
model = self.normalize_model_id(model_id)
return self._ensure_available(model).predict_points(model, image, points, labels)
def predict_box(self, model_id: str | None, image: Any, box: list[float]):
return self._ensure_available(model_id).predict_box(image, box)
model = self.normalize_model_id(model_id)
return self._ensure_available(model).predict_box(model, image, box)
def predict_interactive(
self,
@@ -76,12 +89,13 @@ class SAMRegistry:
labels: list[int],
):
model = self.normalize_model_id(model_id)
if model != "sam2":
if not sam2_engine.is_sam2_model(model):
raise NotImplementedError("Interactive box + point refinement is currently supported by SAM 2.")
return self._ensure_available(model).predict_interactive(image, box, points, labels)
return self._ensure_available(model).predict_interactive(model, image, box, points, labels)
def predict_auto(self, model_id: str | None, image: Any):
return self._ensure_available(model_id).predict_auto(image)
model = self.normalize_model_id(model_id)
return self._ensure_available(model).predict_auto(model, image)
def predict_semantic(
self,
@@ -90,14 +104,8 @@ class SAMRegistry:
text: str,
confidence_threshold: float | None = None,
):
model = self.normalize_model_id(model_id)
if model == "sam3":
return self._ensure_available(model).predict_semantic(
image,
text,
confidence_threshold=confidence_threshold,
)
return self._ensure_available(model).predict_auto(image)
self.normalize_model_id(model_id)
raise NotImplementedError("Semantic text prompting is disabled; use SAM 2 point or box prompts.")
def propagate_video(
self,
@@ -108,7 +116,9 @@ class SAMRegistry:
direction: str,
max_frames: int | None,
):
return self._ensure_available(model_id).propagate_video(
model = self.normalize_model_id(model_id)
return self._ensure_available(model).propagate_video(
model,
frame_paths,
source_frame_index,
seed,

View File

@@ -87,28 +87,14 @@ def test_predict_applies_crop_and_background_filter_options(client, monkeypatch)
assert all(0.0 <= coord <= 1.0 for point in polygon for coord in point)
def test_predict_box_and_semantic_fallback(client, monkeypatch):
def test_predict_box_and_rejects_semantic_prompt(client, monkeypatch):
_, frame, _ = _create_project_and_frame(client)
calls = {}
monkeypatch.setattr("routers.ai._load_frame_image", lambda frame: np.zeros((10, 10, 3), dtype=np.uint8))
monkeypatch.setattr("routers.ai.sam_registry.predict_box", lambda model, image, box: (
[[[0.2, 0.2], [0.8, 0.2], [0.8, 0.8]]],
[0.8],
))
def fake_predict_semantic(model, image, text, confidence_threshold=None):
calls["semantic"] = {
"model": model,
"text": text,
"confidence_threshold": confidence_threshold,
}
return (
[[[0.0, 0.0], [1.0, 0.0], [1.0, 1.0]]],
[0.5],
)
monkeypatch.setattr("routers.ai.sam_registry.predict_semantic", fake_predict_semantic)
box_response = client.post("/api/ai/predict", json={
"image_id": frame["id"],
"prompt_type": "box",
@@ -124,13 +110,8 @@ def test_predict_box_and_semantic_fallback(client, monkeypatch):
assert box_response.status_code == 200
assert box_response.json()["scores"] == [0.8]
assert semantic_response.status_code == 200
assert semantic_response.json()["scores"] == [0.5]
assert calls["semantic"] == {
"model": "sam3",
"text": "胆囊",
"confidence_threshold": 0.05,
}
assert semantic_response.status_code == 400
assert "Unsupported model: sam3" in semantic_response.json()["detail"]
def test_predict_interactive_combines_box_and_points(client, monkeypatch):
@@ -158,13 +139,13 @@ def test_predict_interactive_combines_box_and_points(client, monkeypatch):
"points": [[0.5, 0.5], [0.2, 0.2]],
"labels": [1, 0],
},
"model": "sam2",
"model": "sam2.1_hiera_small",
})
assert response.status_code == 200
assert response.json()["scores"] == [0.88]
assert calls == {
"model": "sam2",
"model": "sam2.1_hiera_small",
"box": [0.1, 0.1, 0.9, 0.9],
"points": [[0.5, 0.5], [0.2, 0.2]],
"labels": [1, 0],
@@ -173,7 +154,7 @@ def test_predict_interactive_combines_box_and_points(client, monkeypatch):
def test_model_status_reports_runtime(client, monkeypatch):
monkeypatch.setattr("routers.ai.sam_registry.runtime_status", lambda selected_model=None: {
"selected_model": selected_model or "sam2",
"selected_model": "sam2.1_hiera_tiny",
"gpu": {
"available": False,
"device": "cpu",
@@ -184,8 +165,8 @@ def test_model_status_reports_runtime(client, monkeypatch):
},
"models": [
{
"id": "sam2",
"label": "SAM 2",
"id": "sam2.1_hiera_tiny",
"label": "SAM 2.1 Tiny",
"available": True,
"loaded": False,
"device": "cpu",
@@ -198,31 +179,23 @@ def test_model_status_reports_runtime(client, monkeypatch):
"torch_ok": True,
"cuda_required": False,
},
{
"id": "sam3",
"label": "SAM 3",
"available": False,
"loaded": False,
"device": "unavailable",
"supports": ["semantic"],
"message": "missing Python 3.12+ runtime",
"package_available": False,
"checkpoint_exists": False,
"checkpoint_path": None,
"python_ok": False,
"torch_ok": True,
"cuda_required": True,
},
],
})
response = client.get("/api/ai/models/status?selected_model=sam3")
response = client.get("/api/ai/models/status")
assert response.status_code == 200
body = response.json()
assert body["selected_model"] == "sam3"
assert body["models"][1]["id"] == "sam3"
assert body["models"][1]["available"] is False
assert body["selected_model"] == "sam2.1_hiera_tiny"
assert len(body["models"]) == 1
assert body["models"][0]["id"] == "sam2.1_hiera_tiny"
def test_model_status_rejects_disabled_sam3(client):
response = client.get("/api/ai/models/status?selected_model=sam3")
assert response.status_code == 400
assert "Unsupported model" in response.json()["detail"]
def test_propagate_saves_tracked_annotations(client, monkeypatch):
@@ -267,7 +240,7 @@ def test_propagate_saves_tracked_annotations(client, monkeypatch):
response = client.post("/api/ai/propagate", json={
"project_id": project["id"],
"frame_id": frames[0]["id"],
"model": "sam2",
"model": "sam2.1_hiera_tiny",
"direction": "forward",
"max_frames": 2,
"include_source": False,
@@ -285,13 +258,13 @@ def test_propagate_saves_tracked_annotations(client, monkeypatch):
body = response.json()
assert body["created_annotation_count"] == 1
assert body["processed_frame_count"] == 2
assert calls["model"] == "sam2"
assert calls["model"] == "sam2.1_hiera_tiny"
assert calls["source_frame_index"] == 0
assert calls["direction"] == "forward"
assert calls["frame_count"] == 2
saved = body["annotations"][0]
assert saved["frame_id"] == frames[1]["id"]
assert saved["mask_data"]["source"] == "sam2_propagation"
assert saved["mask_data"]["source"] == "sam2.1_hiera_tiny_propagation"
assert saved["mask_data"]["class"]["name"] == "胆囊"
assert saved["mask_data"]["score"] == 0.8

View File

@@ -69,3 +69,44 @@ def test_dashboard_overview_uses_persisted_records(client, db_session):
assert any(item["kind"] == "annotation" for item in body["activity"])
assert any(item["kind"] == "template" for item in body["activity"])
assert all(item["name"] != "Ready Project" for item in body["tasks"])
def test_dashboard_overview_keeps_recent_success_tasks_in_progress_list(client, db_session):
from models import ProcessingTask
project = client.post("/api/projects", json={
"name": "Completed Project",
"status": "ready",
}).json()
task = ProcessingTask(
task_type="parse_video",
status="success",
progress=100,
message="解析完成",
project_id=project["id"],
payload={"source_type": "video"},
result={"frames_extracted": 120},
)
db_session.add(task)
db_session.commit()
db_session.refresh(task)
response = client.get("/api/dashboard/overview")
assert response.status_code == 200
body = response.json()
assert body["summary"]["parsing_task_count"] == 0
assert body["tasks"] == [
{
"id": f"task-{task.id}",
"task_id": task.id,
"project_id": project["id"],
"name": "Completed Project",
"progress": 100,
"status": "解析完成",
"raw_status": "success",
"frame_count": 120,
"error": None,
"updated_at": body["tasks"][0]["updated_at"],
},
]

View File

@@ -1,6 +1,6 @@
import numpy as np
from services.sam2_engine import SAM2Engine
from services.sam2_engine import DEFAULT_SAM2_MODEL_ID, SAM2Engine
class _FakePredictor:
@@ -26,8 +26,8 @@ def _mask(offset=0):
def _ready_engine(monkeypatch, predictor):
monkeypatch.setattr("services.sam2_engine.SAM2_AVAILABLE", True)
engine = SAM2Engine()
engine._model_loaded = True
engine._predictor = predictor
engine._model_loaded[DEFAULT_SAM2_MODEL_ID] = True
engine._predictors[DEFAULT_SAM2_MODEL_ID] = predictor
return engine
@@ -39,6 +39,7 @@ def test_sam2_point_prediction_requests_single_best_mask(monkeypatch):
engine = _ready_engine(monkeypatch, predictor)
polygons, scores = engine.predict_points(
DEFAULT_SAM2_MODEL_ID,
np.zeros((32, 32, 3), dtype=np.uint8),
[[0.5, 0.5]],
[1],
@@ -56,8 +57,24 @@ def test_sam2_auto_prediction_keeps_single_best_mask(monkeypatch):
)
engine = _ready_engine(monkeypatch, predictor)
polygons, scores = engine.predict_auto(np.zeros((32, 32, 3), dtype=np.uint8))
polygons, scores = engine.predict_auto(DEFAULT_SAM2_MODEL_ID, np.zeros((32, 32, 3), dtype=np.uint8))
assert predictor.calls[0]["multimask_output"] is False
assert len(polygons) == 1
assert scores == [0.800000011920929]
def test_sam2_status_exposes_selectable_variants(monkeypatch, tmp_path):
checkpoint = tmp_path / "sam2.1_hiera_small.pt"
checkpoint.write_bytes(b"model")
monkeypatch.setattr("services.sam2_engine.settings.sam_model_path", str(tmp_path / "sam2.1_hiera_tiny.pt"))
engine = SAM2Engine()
status = engine.status("sam2.1_hiera_small")
assert engine.normalize_model_id("sam2") == DEFAULT_SAM2_MODEL_ID
assert "sam2.1_hiera_small" in engine.variant_ids()
assert status["id"] == "sam2.1_hiera_small"
assert status["label"] == "SAM 2.1 Small"
assert status["checkpoint_exists"] is True
assert status["checkpoint_path"].endswith("sam2.1_hiera_small.pt")

View File

@@ -2,6 +2,12 @@ import json
from pathlib import Path
import numpy as np
import pytest
pytest.skip(
"SAM 3 integration is disabled in the current SAM2-only product flow.",
allow_module_level=True,
)
from services.sam3_engine import SAM3Engine
from services.sam3_external_worker import _prediction_to_response, _to_numpy