后端能力: - 新增 Celery app、worker task、ProcessingTask 模型、/api/tasks 查询接口和 media_task_runner,将 /api/media/parse 改为创建后台任务并由 worker 执行 FFmpeg/OpenCV/pydicom 拆帧。 - 新增 Redis 进度事件模块和 FastAPI Redis pub/sub 订阅,将 worker 任务进度广播到 /ws/progress;Dashboard 后端概览接口改为聚合 projects/frames/annotations/templates/processing_tasks。 - 统一项目状态为 pending/parsing/ready/error,新增共享 status 常量,并让前端兼容归一化旧状态值。 - 扩展 AI 后端:新增 SAM registry、SAM2 真实运行状态、SAM3 状态检测与文本语义推理适配入口,以及 /api/ai/models/status GPU/模型状态接口。 - 补齐标注保存/更新/删除、COCO/PNG mask 导出相关后端契约和模板 mapping_rules 打包/解包行为。 前端能力: - 新增运行时 API/WS 地址推导配置,前端 API 封装对齐 FastAPI 路由、字段映射、任务轮询、标注归档、导出下载和 AI 预测响应转换。 - Dashboard 改为读取 /api/dashboard/overview,并订阅 WebSocket progress/complete/error/status 更新解析队列和实时流转记录。 - 项目库导入视频/DICOM 后创建项目、上传媒体、触发异步解析并刷新真实项目列表。 - 工作区加载真实帧、无帧时触发解析任务、回显已保存标注、保存未归档 mask、更新 dirty mask、清空当前帧后端标注、导出 COCO JSON。 - Canvas 支持当前帧点/框提示调用后端 AI、渲染推理/已保存 mask、应用模板分类并维护保存状态计数;时间轴按项目 fps 播放。 - AI 页面新增 SAM2/SAM3 模型选择,预测请求携带 model;侧边栏和工作区新增真实 GPU/SAM 状态徽标。 - 模板库和本体面板接入真实模板 CRUD、分类编辑、拖拽排序、JSON 导入、默认腹腔镜分类和本地自定义分类选择。 测试与文档: - 新增 Vitest 配置、前端测试 setup、API/config/websocket/store/组件测试,覆盖登录、项目库、Dashboard、Canvas、工作区、模型状态、时间轴、本体和模板库。 - 新增 pytest 后端测试夹具和 auth/projects/templates/media/AI/export/dashboard/tasks/progress 测试,使用 SQLite、fake MinIO、fake SAM registry 和 Redis monkeypatch 隔离外部服务。 - 新增 doc/ 文档结构,冻结当前需求、设计、接口契约、测试计划、前端逐元素审计、实现地图和后续实施计划,并同步更新 README 与 AGENTS。 验证: - conda run -n seg_server pytest backend/tests:27 passed。 - npm run test:run:54 passed。 - npm run lint、npm run build、compileall、git diff --check 均通过;Vite 仅提示大 chunk 警告。
149 lines
5.3 KiB
Python
149 lines
5.3 KiB
Python
"""SAM 3 engine adapter and runtime status.
|
|
|
|
The official facebookresearch/sam3 package currently targets Python 3.12+
|
|
and CUDA-capable PyTorch. This adapter reports those requirements honestly and
|
|
only performs inference when the local runtime can actually import and execute
|
|
the package.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import importlib.util
|
|
import logging
|
|
import sys
|
|
from typing import Any
|
|
|
|
import numpy as np
|
|
from PIL import Image
|
|
|
|
from config import settings
|
|
from services.sam2_engine import SAM2Engine
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
import torch
|
|
|
|
TORCH_AVAILABLE = True
|
|
except Exception as exc: # noqa: BLE001
|
|
TORCH_AVAILABLE = False
|
|
torch = None # type: ignore[assignment]
|
|
logger.warning("PyTorch import failed (%s). SAM3 will be unavailable.", exc)
|
|
|
|
SAM3_PACKAGE_AVAILABLE = importlib.util.find_spec("sam3") is not None
|
|
|
|
|
|
class SAM3Engine:
|
|
"""Lazy SAM 3 image inference adapter."""
|
|
|
|
def __init__(self) -> None:
|
|
self._model: Any | None = None
|
|
self._processor: Any | None = None
|
|
self._model_loaded = False
|
|
self._last_error: str | None = None
|
|
|
|
def _python_ok(self) -> bool:
|
|
return sys.version_info >= (3, 12)
|
|
|
|
def _gpu_ok(self) -> bool:
|
|
return bool(TORCH_AVAILABLE and torch is not None and torch.cuda.is_available())
|
|
|
|
def _can_load(self) -> bool:
|
|
return bool(SAM3_PACKAGE_AVAILABLE and TORCH_AVAILABLE and self._python_ok() and self._gpu_ok())
|
|
|
|
def _load_model(self) -> None:
|
|
if self._model_loaded:
|
|
return
|
|
if not self._can_load():
|
|
self._last_error = self._status_message()
|
|
self._model_loaded = True
|
|
return
|
|
|
|
try:
|
|
from sam3.model.sam3_image_processor import Sam3Processor
|
|
from sam3.model_builder import build_sam3_image_model
|
|
|
|
self._model = build_sam3_image_model()
|
|
self._processor = Sam3Processor(self._model)
|
|
self._model_loaded = True
|
|
self._last_error = None
|
|
logger.info("SAM 3 image model loaded with version setting %s", settings.sam3_model_version)
|
|
except Exception as exc: # noqa: BLE001
|
|
self._last_error = str(exc)
|
|
self._model_loaded = True
|
|
logger.error("Failed to load SAM 3 model: %s", exc)
|
|
|
|
def _ensure_ready(self) -> bool:
|
|
self._load_model()
|
|
return self._processor is not None
|
|
|
|
def _status_message(self) -> str:
|
|
missing = []
|
|
if not SAM3_PACKAGE_AVAILABLE:
|
|
missing.append("sam3 package")
|
|
if not self._python_ok():
|
|
missing.append("Python 3.12+ runtime")
|
|
if not TORCH_AVAILABLE:
|
|
missing.append("PyTorch")
|
|
if not self._gpu_ok():
|
|
missing.append("CUDA GPU")
|
|
if missing:
|
|
return f"SAM 3 unavailable: missing {', '.join(missing)}."
|
|
return "SAM 3 dependencies are present; model will load on first inference."
|
|
|
|
def status(self) -> dict:
|
|
available = self._can_load()
|
|
return {
|
|
"id": "sam3",
|
|
"label": "SAM 3",
|
|
"available": available,
|
|
"loaded": self._processor is not None,
|
|
"device": "cuda" if self._gpu_ok() else "unavailable",
|
|
"supports": ["semantic"],
|
|
"message": "SAM 3 model loaded and ready." if self._processor is not None else (self._last_error or self._status_message()),
|
|
"package_available": SAM3_PACKAGE_AVAILABLE,
|
|
"checkpoint_exists": SAM3_PACKAGE_AVAILABLE,
|
|
"checkpoint_path": f"official/HuggingFace ({settings.sam3_model_version})",
|
|
"python_ok": self._python_ok(),
|
|
"torch_ok": TORCH_AVAILABLE,
|
|
"cuda_required": True,
|
|
}
|
|
|
|
def predict_semantic(self, image: np.ndarray, text: str) -> tuple[list[list[list[float]]], list[float]]:
|
|
if not text.strip():
|
|
raise ValueError("SAM 3 semantic prompt requires non-empty text.")
|
|
if not self._ensure_ready():
|
|
raise RuntimeError(self.status()["message"])
|
|
|
|
pil_image = Image.fromarray(image)
|
|
with torch.inference_mode(): # type: ignore[union-attr]
|
|
state = self._processor.set_image(pil_image)
|
|
output = self._processor.set_text_prompt(state=state, prompt=text.strip())
|
|
|
|
masks = output.get("masks", [])
|
|
scores = output.get("scores", [])
|
|
polygons = []
|
|
for mask in masks:
|
|
if hasattr(mask, "detach"):
|
|
mask = mask.detach().cpu().numpy()
|
|
if mask.ndim == 3:
|
|
mask = mask[0]
|
|
poly = SAM2Engine._mask_to_polygon(mask)
|
|
if poly:
|
|
polygons.append(poly)
|
|
|
|
if hasattr(scores, "detach"):
|
|
scores = scores.detach().cpu().tolist()
|
|
elif hasattr(scores, "tolist"):
|
|
scores = scores.tolist()
|
|
return polygons, list(scores)
|
|
|
|
def predict_points(self, *_args: Any, **_kwargs: Any) -> tuple[list[list[list[float]]], list[float]]:
|
|
raise NotImplementedError("This backend currently exposes SAM 3 semantic text inference; use SAM 2 for point prompts.")
|
|
|
|
def predict_box(self, *_args: Any, **_kwargs: Any) -> tuple[list[list[list[float]]], list[float]]:
|
|
raise NotImplementedError("This backend currently exposes SAM 3 semantic text inference; use SAM 2 for box prompts.")
|
|
|
|
|
|
sam3_engine = SAM3Engine()
|