添加Docker自包含部署分支

- 新增 Seg_Server_Docker 自包含部署内容,包含前后端、FastAPI、Celery、PostgreSQL、Redis、MinIO、演示视频和 DICOM 数据。

- 保留 demo 数据以支持恢复演示出厂设置,排除 SAM 2.1 .pt 权重并在 README 中补充下载命令。

- 补充 GPU 部署、backend/worker 镜像复用、frpc/frps + NPM 公网域名反代部署说明。

- 在 .env/.env.example 中用 # XXXX 标注局域网和公网域名部署需要修改的配置项。

- 添加部署分支 .gitignore,忽略本地模型权重、构建产物、缓存和日志。
This commit is contained in:
2026-05-07 19:06:07 +08:00
commit b5413066a0
396 changed files with 32742 additions and 0 deletions

View File

View File

@@ -0,0 +1,164 @@
"""Bundled system ontology templates and restore helpers."""
from __future__ import annotations
from copy import deepcopy
from sqlalchemy.orm import Session
from models import Template
RESERVED_UNCLASSIFIED_CLASS = {
"id": "reserved-unclassified",
"name": "待分类",
"color": "#000000",
"zIndex": 0,
"maskId": 0,
"category": "系统保留",
}
def _with_reserved_unclassified_class(classes: list[dict]) -> list[dict]:
filtered = [
item for item in classes
if item.get("id") != RESERVED_UNCLASSIFIED_CLASS["id"]
and item.get("name") != RESERVED_UNCLASSIFIED_CLASS["name"]
and item.get("maskId") != 0
]
return [*filtered, dict(RESERVED_UNCLASSIFIED_CLASS)]
def _template_classes(
template_name: str,
names: list[str],
colors: list[tuple[int, int, int]],
*,
id_prefix: str,
) -> list[dict]:
classes = []
for idx, (rgb, name) in enumerate(zip(colors, names)):
color_hex = f"#{rgb[0]:02x}{rgb[1]:02x}{rgb[2]:02x}"
classes.append({
"id": f"{id_prefix}-{idx}",
"name": name,
"color": color_hex,
"zIndex": (len(names) - idx) * 10,
"maskId": idx + 1,
"category": template_name,
})
return classes
def bundled_default_template_definitions() -> list[dict]:
"""Return fresh definitions for all bundled system templates."""
return [
{
"name": "腹腔镜胆囊切除术",
"description": "腹腔镜胆囊切除术LC手术器械与解剖结构语义分割模板共35个分类",
"color": "#06b6d4",
"z_index": 0,
"classes": _with_reserved_unclassified_class(_template_classes(
"腹腔镜胆囊切除术",
[
"", "线", "肿瘤", "血管阻断夹", "棉球", "双极电凝",
"肝脏", "胆囊", "分离钳", "脂肪", "止血海绵", "肝总管",
"吸引器", "剪刀", "超声刀", "止血纱布", "胆总管", "生物夹",
"无损伤钳", "钳夹", "喷洒", "胆囊管", "动脉", "电凝",
"静脉", "标本袋", "引流管", "纱布", "金属钛夹", "韧带",
"肝蒂", "推结器", "乳胶管-血管阻断", "吻合器", "术中超声",
],
[
(134, 124, 118), (0, 157, 142), (245, 161, 0), (255, 172, 159), (146, 175, 236), (155, 62, 0),
(255, 91, 0), (255, 234, 0), (85, 111, 181), (155, 132, 0), (181, 227, 14), (72, 0, 255),
(255, 0, 255), (29, 32, 136), (240, 16, 116), (160, 15, 95), (0, 155, 33), (0, 160, 233),
(52, 184, 178), (66, 115, 82), (90, 120, 41), (255, 0, 0), (117, 0, 0), (167, 24, 233),
(42, 8, 66), (112, 113, 150), (0, 255, 0), (255, 255, 255), (0, 255, 255), (181, 85, 105),
(113, 102, 140), (202, 202, 200), (197, 83, 181), (136, 162, 196), (138, 251, 213),
],
id_prefix="cls-lap",
)),
},
{
"name": "头颈部CT分割",
"description": "头颈部CT分割",
"color": "#ef4444",
"z_index": 10,
"classes": _with_reserved_unclassified_class(_template_classes(
"头颈部CT分割",
[
"肿瘤/结节",
"下颌骨",
"甲状腺",
"气管",
"颈椎",
"颈动脉",
"颈静脉",
"腮腺",
"下颌下腺",
"舌骨",
],
[
(255, 0, 0),
(0, 255, 0),
(0, 0, 255),
(255, 255, 0),
(255, 0, 255),
(0, 255, 255),
(255, 128, 0),
(128, 0, 128),
(0, 128, 128),
(128, 128, 0),
],
id_prefix="cls-head-neck-ct",
)),
},
]
def _has_legacy_head_neck_english_labels(template: Template) -> bool:
if template.name != "头颈部CT分割":
return False
classes = (template.mapping_rules or {}).get("classes") or []
return any(
isinstance(item, dict)
and isinstance(item.get("name"), str)
and "(" in item["name"]
and ")" in item["name"]
for item in classes
)
def ensure_default_templates(db: Session, *, restore_existing: bool = False) -> list[Template]:
"""Create bundled system templates, optionally restoring existing ones exactly."""
templates: list[Template] = []
for definition in bundled_default_template_definitions():
existing = db.query(Template).filter(
Template.name == definition["name"],
Template.owner_user_id.is_(None),
).first()
if existing is None:
existing = Template(owner_user_id=None)
db.add(existing)
elif not restore_existing and not _has_legacy_head_neck_english_labels(existing):
templates.append(existing)
continue
existing.name = definition["name"]
existing.description = definition["description"]
existing.color = definition["color"]
existing.z_index = definition["z_index"]
existing.owner_user_id = None
existing.mapping_rules = {
"classes": deepcopy(definition["classes"]),
"rules": [],
}
templates.append(existing)
db.commit()
for template in templates:
db.refresh(template)
return templates
def restore_default_templates(db: Session) -> list[Template]:
"""Restore bundled system templates after demo factory reset."""
return ensure_default_templates(db, restore_existing=True)

View File

@@ -0,0 +1,217 @@
"""Helpers for seeding the bundled demo media project."""
from __future__ import annotations
import os
import shutil
import tempfile
from pathlib import Path
import cv2
from sqlalchemy.orm import Session
from minio_client import upload_file
from models import Frame, Project, User
from services.frame_parser import (
extract_thumbnail,
natural_filename_key,
parse_dicom,
parse_video,
upload_frames_to_minio,
)
from statuses import PROJECT_STATUS_PENDING, PROJECT_STATUS_READY
DEMO_DICOM_PROJECT_NAME = "演视DICOM序列"
DEMO_DICOM_PARSE_FPS = 30.0
DEMO_VIDEO_PROJECT_NAME = "演视LC视频序列"
DEMO_VIDEO_PARSE_FPS = 30.0
DEMO_VIDEO_TARGET_WIDTH = 640
LEGACY_DEMO_VIDEO_PROJECT_NAMES = {"Data_MyVideo_1"}
LEGACY_DEMO_DICOM_PROJECT_NAMES = {"演示DICOM序列"}
def demo_dicom_files(dicom_dir: str) -> list[Path]:
"""Return .dcm files in natural file-name order."""
root = Path(dicom_dir)
if not root.exists() or not root.is_dir():
return []
return sorted(
[path for path in root.iterdir() if path.is_file() and path.name.lower().endswith(".dcm")],
key=lambda path: natural_filename_key(path.name),
)
def create_unparsed_video_demo_project(
db: Session,
*,
owner: User,
video_path: str,
project_name: str = DEMO_VIDEO_PROJECT_NAME,
) -> Project:
"""Create the bundled demo video project without extracting frames."""
source = Path(video_path)
if not source.exists() or not source.is_file():
raise FileNotFoundError(f"Demo video not found: {video_path}")
project = Project(
name=project_name,
description="默认演示视频,尚未生成帧",
status=PROJECT_STATUS_PENDING,
source_type="video",
parse_fps=30.0,
original_fps=None,
owner_user_id=owner.id,
)
db.add(project)
db.flush()
data = source.read_bytes()
object_name = f"uploads/{project.id}/{source.name}"
upload_file(object_name, data, content_type="video/mp4", length=len(data))
project.video_path = object_name
project.thumbnail_url = None
db.commit()
db.refresh(project)
return project
def create_parsed_video_demo_project(
db: Session,
*,
owner: User,
video_path: str,
project_name: str = DEMO_VIDEO_PROJECT_NAME,
) -> Project:
"""Create the bundled demo video project and register its extracted frame sequence."""
source = Path(video_path)
if not source.exists() or not source.is_file():
raise FileNotFoundError(f"Demo video not found: {video_path}")
project = Project(
name=project_name,
description="默认演示视频,已生成帧",
status=PROJECT_STATUS_PENDING,
source_type="video",
parse_fps=DEMO_VIDEO_PARSE_FPS,
original_fps=None,
owner_user_id=owner.id,
)
db.add(project)
db.flush()
data = source.read_bytes()
object_name = f"uploads/{project.id}/{source.name}"
upload_file(object_name, data, content_type="video/mp4", length=len(data))
project.video_path = object_name
tmp_dir = tempfile.mkdtemp(prefix=f"seg_demo_video_{project.id}_")
try:
output_dir = os.path.join(tmp_dir, "frames")
frame_files, original_fps = parse_video(
str(source),
output_dir,
fps=int(DEMO_VIDEO_PARSE_FPS),
target_width=DEMO_VIDEO_TARGET_WIDTH,
)
project.original_fps = original_fps
object_names = upload_frames_to_minio(frame_files, project.id)
for idx, obj_name in enumerate(object_names):
image = cv2.imread(frame_files[idx])
height, width = image.shape[:2] if image is not None else (None, None)
db.add(Frame(
project_id=project.id,
frame_index=idx,
image_url=obj_name,
width=width,
height=height,
timestamp_ms=idx * 1000.0 / DEMO_VIDEO_PARSE_FPS,
source_frame_number=idx,
))
thumbnail_path = os.path.join(tmp_dir, "thumbnail.jpg")
try:
extract_thumbnail(str(source), thumbnail_path)
with open(thumbnail_path, "rb") as thumbnail_file:
thumbnail_data = thumbnail_file.read()
thumbnail_object = f"projects/{project.id}/thumbnail.jpg"
upload_file(
thumbnail_object,
thumbnail_data,
content_type="image/jpeg",
length=len(thumbnail_data),
)
project.thumbnail_url = thumbnail_object
except Exception: # noqa: BLE001
if object_names:
project.thumbnail_url = object_names[0]
project.status = PROJECT_STATUS_READY
db.commit()
db.refresh(project)
return project
finally:
shutil.rmtree(tmp_dir, ignore_errors=True)
def create_parsed_dicom_demo_project(
db: Session,
*,
owner: User,
dicom_dir: str,
project_name: str = DEMO_DICOM_PROJECT_NAME,
) -> Project:
"""Create the demo DICOM project, upload the series, and register parsed frames."""
dcm_files = demo_dicom_files(dicom_dir)
if not dcm_files:
raise FileNotFoundError(f"Demo DICOM series not found: {dicom_dir}")
project = Project(
name=project_name,
description=f"默认演示 DICOM 序列,已按文件名自然顺序生成 {len(dcm_files)}",
status=PROJECT_STATUS_PENDING,
source_type="dicom",
parse_fps=DEMO_DICOM_PARSE_FPS,
original_fps=None,
owner_user_id=owner.id,
)
db.add(project)
db.flush()
dicom_prefix = f"uploads/{project.id}/dicom"
for dcm_file in dcm_files:
data = dcm_file.read_bytes()
upload_file(
f"{dicom_prefix}/{dcm_file.name}",
data,
content_type="application/dicom",
length=len(data),
)
project.video_path = dicom_prefix
tmp_dir = tempfile.mkdtemp(prefix=f"seg_demo_dicom_{project.id}_")
try:
output_dir = os.path.join(tmp_dir, "frames")
frame_files = parse_dicom(dicom_dir, output_dir)
object_names = upload_frames_to_minio(frame_files, project.id)
for idx, obj_name in enumerate(object_names):
image = cv2.imread(frame_files[idx])
height, width = image.shape[:2] if image is not None else (None, None)
db.add(Frame(
project_id=project.id,
frame_index=idx,
image_url=obj_name,
width=width,
height=height,
timestamp_ms=idx * 1000.0 / DEMO_DICOM_PARSE_FPS,
source_frame_number=idx,
))
if object_names:
project.thumbnail_url = object_names[0]
project.status = PROJECT_STATUS_READY
db.commit()
db.refresh(project)
return project
finally:
shutil.rmtree(tmp_dir, ignore_errors=True)

View File

@@ -0,0 +1,237 @@
"""Video/DICOM frame parsing and MinIO upload utilities."""
import logging
import os
import re
import shutil
import subprocess
from pathlib import Path
from typing import List, Optional, Tuple
import cv2
import numpy as np
from pydicom import dcmread
from minio_client import upload_file, BUCKET_NAME
logger = logging.getLogger(__name__)
def natural_filename_key(filename: str) -> Tuple[object, ...]:
"""Sort file names by their visible numeric order instead of pure lexicographic order."""
return tuple(
int(part) if part.isdigit() else part.casefold()
for part in re.split(r"(\d+)", Path(filename).name)
)
def get_video_fps(video_path: str) -> float:
"""Read the original frame rate of a video file."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return 30.0
fps = cap.get(cv2.CAP_PROP_FPS)
cap.release()
return fps if fps > 0 else 30.0
def extract_thumbnail(video_path: str, output_path: str, width: int = 640) -> str:
"""Extract the first frame of a video as a thumbnail JPEG."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise RuntimeError(f"Cannot open video for thumbnail: {video_path}")
ret, frame = cap.read()
cap.release()
if not ret or frame is None:
raise RuntimeError(f"Cannot read first frame from: {video_path}")
h, w = frame.shape[:2]
if w > width:
scale = width / w
new_w = int(w * scale)
new_h = int(h * scale)
frame = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
cv2.imwrite(output_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
return output_path
def parse_video(
video_path: str,
output_dir: str,
fps: int = 30,
max_frames: Optional[int] = None,
target_width: int = 640,
) -> Tuple[List[str], float]:
"""Extract frames from a video file using FFmpeg or OpenCV fallback.
Args:
video_path: Path to the input video file.
output_dir: Directory to save extracted frames.
fps: Target frame extraction rate.
max_frames: Optional maximum number of frames to extract.
target_width: Output frame width for model-friendly frame sequences.
Returns:
Tuple of (frame_paths, original_fps).
"""
os.makedirs(output_dir, exist_ok=True)
frame_paths: List[str] = []
original_fps = get_video_fps(video_path)
safe_fps = max(int(fps), 1)
safe_width = max(int(target_width), 1)
# Try FFmpeg first
if shutil.which("ffmpeg"):
try:
pattern = os.path.join(output_dir, "frame_%06d.jpg")
cmd = [
"ffmpeg",
"-i", video_path,
"-vf", f"fps={safe_fps},scale={safe_width}:-1",
"-start_number", "0",
"-q:v", "5",
"-y",
pattern,
]
logger.info("Running FFmpeg: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
if result.returncode == 0:
frame_paths = sorted(
[os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".jpg")]
)
if max_frames:
frame_paths = frame_paths[:max_frames]
logger.info("Extracted %d frames via FFmpeg", len(frame_paths))
return frame_paths, original_fps
else:
logger.warning("FFmpeg failed: %s", result.stderr)
except Exception as exc: # noqa: BLE001
logger.warning("FFmpeg exception: %s", exc)
# OpenCV fallback
logger.info("Falling back to OpenCV frame extraction")
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise RuntimeError(f"Cannot open video: {video_path}")
video_fps = cap.get(cv2.CAP_PROP_FPS) or 30
interval = max(1, int(round(video_fps / safe_fps)))
count = 0
saved = 0
while True:
ret, frame = cap.read()
if not ret:
break
if count % interval == 0:
path = os.path.join(output_dir, f"frame_{saved:06d}.jpg")
h, w = frame.shape[:2]
if safe_width > 0 and w != safe_width:
scale = safe_width / max(w, 1)
frame = cv2.resize(frame, (safe_width, max(1, int(round(h * scale)))), interpolation=cv2.INTER_AREA)
cv2.imwrite(path, frame, [cv2.IMWRITE_JPEG_QUALITY, 80])
frame_paths.append(path)
saved += 1
if max_frames and saved >= max_frames:
break
count += 1
cap.release()
logger.info("Extracted %d frames via OpenCV", len(frame_paths))
return frame_paths, original_fps
def parse_dicom(
dicom_dir: str,
output_dir: str,
max_frames: Optional[int] = None,
) -> List[str]:
"""Extract frames from DICOM files in a directory.
Args:
dicom_dir: Directory containing .dcm files.
output_dir: Directory to save extracted frames.
max_frames: Optional maximum number of frames to extract.
Returns:
List of paths to extracted frame images.
"""
os.makedirs(output_dir, exist_ok=True)
dcm_files = sorted(
[f for f in os.listdir(dicom_dir) if f.lower().endswith(".dcm")],
key=natural_filename_key,
)
frame_paths: List[str] = []
for idx, fname in enumerate(dcm_files):
if max_frames and idx >= max_frames:
break
path = os.path.join(dicom_dir, fname)
try:
ds = dcmread(path)
pixel_array = ds.pixel_array
# Normalize to 8-bit
if pixel_array.dtype != np.uint8:
pixel_array = pixel_array.astype(np.float32)
pixel_array = (
(pixel_array - pixel_array.min())
/ (pixel_array.max() - pixel_array.min() + 1e-8)
* 255
)
pixel_array = pixel_array.astype(np.uint8)
# Handle multi-frame DICOM
if pixel_array.ndim == 3:
for f in range(pixel_array.shape[0]):
out_path = os.path.join(output_dir, f"frame_{idx:06d}_{f:03d}.jpg")
cv2.imwrite(out_path, pixel_array[f], [cv2.IMWRITE_JPEG_QUALITY, 85])
frame_paths.append(out_path)
else:
out_path = os.path.join(output_dir, f"frame_{idx:06d}.jpg")
cv2.imwrite(out_path, pixel_array, [cv2.IMWRITE_JPEG_QUALITY, 85])
frame_paths.append(out_path)
except Exception as exc: # noqa: BLE001
logger.error("Failed to read DICOM %s: %s", path, exc)
logger.info("Extracted %d frames from DICOM", len(frame_paths))
return frame_paths
def upload_frames_to_minio(
frames: List[str],
project_id: int,
object_prefix: Optional[str] = None,
) -> List[str]:
"""Upload a list of local frame images to MinIO.
Args:
frames: List of local file paths.
project_id: Project ID used for bucket path organization.
object_prefix: Optional prefix override.
Returns:
List of object names (paths) in MinIO.
"""
prefix = object_prefix or f"projects/{project_id}/frames"
object_names: List[str] = []
for frame_path in frames:
fname = os.path.basename(frame_path)
object_name = f"{prefix}/{fname}"
try:
with open(frame_path, "rb") as f:
data = f.read()
upload_file(
object_name,
data,
content_type="image/jpeg",
length=len(data),
)
object_names.append(object_name)
except Exception as exc: # noqa: BLE001
logger.error("Failed to upload %s: %s", frame_path, exc)
logger.info("Uploaded %d/%d frames to MinIO", len(object_names), len(frames))
return object_names

View File

@@ -0,0 +1,340 @@
"""Background media parsing runner used by Celery workers."""
import logging
import os
import shutil
import tempfile
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from sqlalchemy.orm import Session
from minio_client import BUCKET_NAME, download_file, get_minio_client, upload_file
from models import Annotation, Frame, Mask, ProcessingTask, Project
from progress_events import publish_task_progress_event
from services.frame_parser import (
extract_thumbnail,
natural_filename_key,
parse_dicom,
parse_video,
upload_frames_to_minio,
)
from statuses import (
PROJECT_STATUS_PENDING,
PROJECT_STATUS_ERROR,
PROJECT_STATUS_PARSING,
PROJECT_STATUS_READY,
TASK_STATUS_CANCELLED,
TASK_STATUS_FAILED,
TASK_STATUS_RUNNING,
TASK_STATUS_SUCCESS,
)
logger = logging.getLogger(__name__)
class TaskCancelled(RuntimeError):
"""Raised internally when a persisted task has been cancelled."""
def _now() -> datetime:
return datetime.now(timezone.utc)
def _set_task_state(
db: Session,
task: ProcessingTask,
*,
status: str | None = None,
progress: int | None = None,
message: str | None = None,
result: dict[str, Any] | None = None,
error: str | None = None,
started: bool = False,
finished: bool = False,
) -> None:
if status is not None:
task.status = status
if progress is not None:
task.progress = max(0, min(100, progress))
if message is not None:
task.message = message
if result is not None:
task.result = result
if error is not None:
task.error = error
if started:
task.started_at = _now()
if finished:
task.finished_at = _now()
db.commit()
db.refresh(task)
publish_task_progress_event(task)
def _project_status_after_stop(project: Project) -> str:
return PROJECT_STATUS_READY if project.frames else PROJECT_STATUS_PENDING
def _positive_int(value: Any, default: int | None = None) -> int | None:
try:
parsed = int(value)
except (TypeError, ValueError):
return default
return parsed if parsed > 0 else default
def _positive_float(value: Any, default: float) -> float:
try:
parsed = float(value)
except (TypeError, ValueError):
return default
return parsed if parsed > 0 else default
def _frame_sequence_metadata(
index: int,
parse_fps: float,
original_fps: float | None,
) -> dict[str, float | int | None]:
safe_parse_fps = max(float(parse_fps or 1.0), 1e-6)
timestamp_ms = index * 1000.0 / safe_parse_fps
source_frame_number = None
if original_fps and original_fps > 0:
source_frame_number = int(round(index * original_fps / safe_parse_fps))
return {
"timestamp_ms": timestamp_ms,
"source_frame_number": source_frame_number,
}
def _clear_existing_project_outputs(db: Session, project: Project) -> None:
"""Remove stale frame sequence and annotations before regenerating frames."""
annotation_ids = db.query(Annotation.id).filter(Annotation.project_id == project.id)
db.query(Mask).filter(Mask.annotation_id.in_(annotation_ids)).delete(synchronize_session=False)
db.query(Annotation).filter(Annotation.project_id == project.id).delete(synchronize_session=False)
db.query(Frame).filter(Frame.project_id == project.id).delete(synchronize_session=False)
project.thumbnail_url = None
db.commit()
def _ensure_not_cancelled(db: Session, task: ProcessingTask) -> None:
db.refresh(task)
if task.status == TASK_STATUS_CANCELLED:
raise TaskCancelled("Task was cancelled")
def run_parse_media_task(db: Session, task_id: int) -> dict[str, Any]:
"""Parse one project's media and update task progress in the database."""
task = db.query(ProcessingTask).filter(ProcessingTask.id == task_id).first()
if not task:
raise ValueError(f"Task not found: {task_id}")
if task.status == TASK_STATUS_CANCELLED:
return {
"task_id": task.id,
"status": TASK_STATUS_CANCELLED,
"message": task.message or "任务已取消",
}
if task.project_id is None:
_set_task_state(
db,
task,
status=TASK_STATUS_FAILED,
progress=100,
message="任务缺少 project_id",
error="Task has no project_id",
finished=True,
)
raise ValueError("Task has no project_id")
project = db.query(Project).filter(Project.id == task.project_id).first()
if not project:
_set_task_state(
db,
task,
status=TASK_STATUS_FAILED,
progress=100,
message="项目不存在",
error="Project not found",
finished=True,
)
raise ValueError(f"Project not found: {task.project_id}")
if not project.video_path:
_set_task_state(
db,
task,
status=TASK_STATUS_FAILED,
progress=100,
message="项目没有可解析媒体",
error="Project has no media uploaded",
finished=True,
)
project.status = PROJECT_STATUS_ERROR
db.commit()
raise ValueError("Project has no media uploaded")
_ensure_not_cancelled(db, task)
project.status = PROJECT_STATUS_PARSING
_clear_existing_project_outputs(db, project)
_set_task_state(db, task, status=TASK_STATUS_RUNNING, progress=5, message="后台解析已启动", started=True)
payload = task.payload or {}
effective_source = payload.get("source_type") or project.source_type or "video"
parse_fps = _positive_float(payload.get("parse_fps"), project.parse_fps or 30.0)
max_frames = _positive_int(payload.get("max_frames"))
target_width = _positive_int(payload.get("target_width"), 640) or 640
project.parse_fps = parse_fps
tmp_dir = tempfile.mkdtemp(prefix=f"seg_parse_{project.id}_")
output_dir = os.path.join(tmp_dir, "frames")
os.makedirs(output_dir, exist_ok=True)
try:
_ensure_not_cancelled(db, task)
_set_task_state(db, task, progress=15, message="正在下载媒体文件")
if effective_source == "dicom":
dcm_dir = os.path.join(tmp_dir, "dcm")
os.makedirs(dcm_dir, exist_ok=True)
client = get_minio_client()
objects = sorted(
list(client.list_objects(BUCKET_NAME, prefix=project.video_path, recursive=True)),
key=lambda obj: natural_filename_key(obj.object_name),
)
for obj in objects:
_ensure_not_cancelled(db, task)
if obj.object_name.lower().endswith(".dcm"):
data = download_file(obj.object_name)
local_dcm = os.path.join(dcm_dir, os.path.basename(obj.object_name))
with open(local_dcm, "wb") as f:
f.write(data)
_ensure_not_cancelled(db, task)
_set_task_state(db, task, progress=35, message="正在解析 DICOM 序列")
frame_files = parse_dicom(dcm_dir, output_dir, max_frames=max_frames)
else:
_ensure_not_cancelled(db, task)
media_bytes = download_file(project.video_path)
local_path = os.path.join(tmp_dir, Path(project.video_path).name)
with open(local_path, "wb") as f:
f.write(media_bytes)
_ensure_not_cancelled(db, task)
_set_task_state(db, task, progress=35, message="正在使用 FFmpeg/OpenCV 拆帧")
frame_files, original_fps = parse_video(
local_path,
output_dir,
fps=int(parse_fps),
max_frames=max_frames,
target_width=target_width,
)
project.original_fps = original_fps
thumbnail_path = os.path.join(tmp_dir, "thumbnail.jpg")
try:
extract_thumbnail(local_path, thumbnail_path)
with open(thumbnail_path, "rb") as f:
thumb_data = f.read()
thumb_object = f"projects/{project.id}/thumbnail.jpg"
upload_file(thumb_object, thumb_data, content_type="image/jpeg", length=len(thumb_data))
project.thumbnail_url = thumb_object
except Exception as exc: # noqa: BLE001
logger.warning("Thumbnail extraction failed: %s", exc)
_ensure_not_cancelled(db, task)
_set_task_state(db, task, progress=70, message="正在上传帧到对象存储")
object_names = upload_frames_to_minio(frame_files, project.id)
_ensure_not_cancelled(db, task)
_set_task_state(db, task, progress=85, message="正在写入帧索引")
frames_out = []
for idx, obj_name in enumerate(object_names):
_ensure_not_cancelled(db, task)
local_frame = frame_files[idx]
try:
import cv2
img = cv2.imread(local_frame)
h, w = img.shape[:2] if img is not None else (None, None)
except Exception: # noqa: BLE001
h, w = None, None
sequence_meta = _frame_sequence_metadata(idx, parse_fps, project.original_fps)
frame = Frame(
project_id=project.id,
frame_index=idx,
image_url=obj_name,
width=w,
height=h,
timestamp_ms=sequence_meta["timestamp_ms"],
source_frame_number=sequence_meta["source_frame_number"],
)
db.add(frame)
frames_out.append(frame)
project.status = PROJECT_STATUS_READY
db.commit()
result = {
"project_id": project.id,
"frames_extracted": len(frames_out),
"status": PROJECT_STATUS_READY,
"message": "Frame extraction completed successfully.",
"frame_sequence": {
"original_fps": project.original_fps,
"parse_fps": parse_fps,
"frame_count": len(frames_out),
"duration_ms": (len(frames_out) - 1) * 1000.0 / parse_fps if frames_out else 0,
"target_width": target_width,
"frame_width": frames_out[0].width if frames_out else None,
"frame_height": frames_out[0].height if frames_out else None,
"max_frames": max_frames,
"object_prefix": f"projects/{project.id}/frames",
},
}
_set_task_state(
db,
task,
status=TASK_STATUS_SUCCESS,
progress=100,
message="解析完成",
result=result,
finished=True,
)
logger.info("Parsed %d frames for project_id=%s", len(frames_out), project.id)
return result
except TaskCancelled:
project.status = _project_status_after_stop(project)
task.status = TASK_STATUS_CANCELLED
task.progress = 100
task.message = task.message or "任务已取消"
task.error = task.error or "Cancelled by user"
task.finished_at = task.finished_at or _now()
db.commit()
db.refresh(task)
publish_task_progress_event(task)
logger.info("Parse task cancelled: task_id=%s project_id=%s", task.id, project.id)
return {
"task_id": task.id,
"project_id": project.id,
"status": TASK_STATUS_CANCELLED,
"message": task.message,
}
except Exception as exc: # noqa: BLE001
project.status = PROJECT_STATUS_ERROR
_set_task_state(
db,
task,
status=TASK_STATUS_FAILED,
progress=100,
message="解析失败",
error=str(exc),
finished=True,
)
logger.error("Frame extraction failed: %s", exc)
raise
finally:
shutil.rmtree(tmp_dir, ignore_errors=True)

View File

@@ -0,0 +1,842 @@
"""Background SAM video propagation runner used by Celery workers."""
import hashlib
import json
import logging
import tempfile
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
import cv2
import numpy as np
from sqlalchemy.orm import Session
from minio_client import download_file
from models import Annotation, Frame, ProcessingTask, Project
from progress_events import publish_task_progress_event
from services.sam_registry import ModelUnavailableError, sam_registry
from statuses import (
TASK_STATUS_CANCELLED,
TASK_STATUS_FAILED,
TASK_STATUS_RUNNING,
TASK_STATUS_SUCCESS,
)
logger = logging.getLogger(__name__)
class PropagationTaskCancelled(RuntimeError):
"""Raised internally when a persisted propagation task has been cancelled."""
def _now() -> datetime:
return datetime.now(timezone.utc)
def _set_task_state(
db: Session,
task: ProcessingTask,
*,
status: str | None = None,
progress: int | None = None,
message: str | None = None,
result: dict[str, Any] | None = None,
error: str | None = None,
started: bool = False,
finished: bool = False,
) -> None:
if status is not None:
task.status = status
if progress is not None:
task.progress = max(0, min(100, progress))
if message is not None:
task.message = message
if result is not None:
task.result = result
if error is not None:
task.error = error
if started:
task.started_at = _now()
if finished:
task.finished_at = _now()
db.commit()
db.refresh(task)
publish_task_progress_event(task)
def _ensure_not_cancelled(db: Session, task: ProcessingTask) -> None:
db.refresh(task)
if task.status == TASK_STATUS_CANCELLED:
raise PropagationTaskCancelled("Task was cancelled")
def _clamp01(value: float) -> float:
return min(max(float(value), 0.0), 1.0)
def _polygon_bbox(polygon: list[list[float]]) -> list[float]:
xs = [_clamp01(point[0]) for point in polygon]
ys = [_clamp01(point[1]) for point in polygon]
left, right = min(xs), max(xs)
top, bottom = min(ys), max(ys)
return [left, top, max(right - left, 0.0), max(bottom - top, 0.0)]
def _polygons_bbox(polygons: list[list[list[float]]]) -> list[float]:
points = [point for polygon in polygons for point in polygon if len(point) >= 2]
if not points:
return [0.0, 0.0, 0.0, 0.0]
xs = [_clamp01(point[0]) for point in points]
ys = [_clamp01(point[1]) for point in points]
left, right = min(xs), max(xs)
top, bottom = min(ys), max(ys)
return [left, top, max(right - left, 0.0), max(bottom - top, 0.0)]
def _normalize_polygon(polygon: list[list[float]]) -> list[list[float]]:
return [[_clamp01(point[0]), _clamp01(point[1])] for point in polygon if len(point) >= 2]
def _normalize_smoothing_options(value: Any) -> dict[str, Any] | None:
if not isinstance(value, dict):
return None
try:
strength = max(0.0, min(float(value.get("strength") or 0.0), 100.0))
except (TypeError, ValueError):
strength = 0.0
if strength <= 0:
return None
method = str(value.get("method") or "chaikin").lower()
if method != "chaikin":
method = "chaikin"
return {"strength": round(strength, 2), "method": method}
def _smoothing_ratio(strength: float, curve: float = 1.65) -> float:
normalized = max(0.0, min(float(strength or 0.0), 100.0)) / 100.0
return normalized ** curve
def _chaikin_smooth_polygon(polygon: list[list[float]], iterations: int, corner_cut: float = 0.25) -> list[list[float]]:
points = _normalize_polygon(polygon)
q = max(0.02, min(float(corner_cut), 0.25))
for _ in range(max(0, iterations)):
if len(points) < 3:
break
next_points: list[list[float]] = []
for index, current in enumerate(points):
following = points[(index + 1) % len(points)]
next_points.append([
_clamp01((1.0 - q) * current[0] + q * following[0]),
_clamp01((1.0 - q) * current[1] + q * following[1]),
])
next_points.append([
_clamp01(q * current[0] + (1.0 - q) * following[0]),
_clamp01(q * current[1] + (1.0 - q) * following[1]),
])
points = next_points
return points
def _simplify_polygon(polygon: list[list[float]], strength: float) -> list[list[float]]:
if len(polygon) < 3:
return polygon
contour = np.array([[[point[0], point[1]]] for point in polygon], dtype=np.float32)
arc_length = cv2.arcLength(contour, True)
epsilon = arc_length * (0.00015 + _smoothing_ratio(strength) * 0.00735)
approx = cv2.approxPolyDP(contour, epsilon, True).reshape(-1, 2)
if len(approx) < 3:
return polygon
return [[_clamp01(float(x)), _clamp01(float(y))] for x, y in approx]
def _smooth_polygon(polygon: list[list[float]], smoothing: dict[str, Any] | None) -> list[list[float]]:
if not smoothing:
return _normalize_polygon(polygon)
strength = float(smoothing.get("strength") or 0.0)
if strength <= 0:
return _normalize_polygon(polygon)
effective_strength = _smoothing_ratio(strength, curve=1.45) * 100.0
if effective_strength >= 85:
iterations = 4
elif effective_strength >= 55:
iterations = 3
elif effective_strength >= 25:
iterations = 2
else:
iterations = 1
corner_cut = 0.03 + _smoothing_ratio(strength, curve=1.35) * 0.22
normalized = _normalize_polygon(polygon)
pre_simplified = _simplify_polygon(normalized, effective_strength * 0.25)
smoothed = _chaikin_smooth_polygon(pre_simplified, iterations, corner_cut)
simplified = _simplify_polygon(smoothed, effective_strength)
if len(simplified) > len(normalized):
for fallback_strength in (25.0, 35.0, 50.0, 70.0, 90.0, 100.0):
simplified = _simplify_polygon(simplified, max(effective_strength, fallback_strength))
if len(simplified) <= len(normalized):
break
return simplified if len(simplified) >= 3 else _normalize_polygon(polygon)
def _bbox_area(bbox: list[float]) -> float:
return max(float(bbox[2]), 0.0) * max(float(bbox[3]), 0.0)
def _bbox_overlap_ratio(a: list[float], b: list[float]) -> float:
ax1, ay1, aw, ah = a
bx1, by1, bw, bh = b
ax2 = ax1 + aw
ay2 = ay1 + ah
bx2 = bx1 + bw
by2 = by1 + bh
overlap_width = max(0.0, min(ax2, bx2) - max(ax1, bx1))
overlap_height = max(0.0, min(ay2, by2) - max(ay1, by1))
overlap_area = overlap_width * overlap_height
smallest_area = min(_bbox_area(a), _bbox_area(b))
return overlap_area / smallest_area if smallest_area > 0 else 0.0
def _stable_json(value: Any) -> str:
return json.dumps(value, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
def _canonicalize_signature_value(value: Any) -> Any:
if isinstance(value, float):
return round(value, 6)
if isinstance(value, list):
return [_canonicalize_signature_value(item) for item in value]
if isinstance(value, dict):
return {key: _canonicalize_signature_value(value[key]) for key in sorted(value)}
return value
def _seed_signature(seed: dict[str, Any]) -> str:
"""Return a stable signature for seed geometry and semantic attrs."""
inherited_signature = seed.get("propagation_seed_signature")
if inherited_signature:
return str(inherited_signature)
signature_payload = {
"polygons": seed.get("polygons") or [],
"holes": seed.get("holes") or [],
"bbox": seed.get("bbox") or [],
"points": seed.get("points") or [],
"labels": seed.get("labels") or [],
"label": seed.get("label"),
"color": seed.get("color"),
"class_metadata": seed.get("class_metadata") or {},
"template_id": seed.get("template_id"),
"smoothing": _normalize_smoothing_options(seed.get("smoothing")),
}
return hashlib.sha256(_stable_json(_canonicalize_signature_value(signature_payload)).encode("utf-8")).hexdigest()
def _seed_key(seed: dict[str, Any]) -> str:
"""Prefer stable persisted ids; fall back to semantic attrs for legacy callers."""
source_instance_id = seed.get("source_instance_id")
if source_instance_id:
return f"instance:{source_instance_id}"
source_annotation_id = seed.get("source_annotation_id")
if source_annotation_id is not None:
return f"annotation:{source_annotation_id}"
source_mask_id = seed.get("source_mask_id")
if source_mask_id:
return f"mask:{source_mask_id}"
class_metadata = seed.get("class_metadata") or {}
class_id = class_metadata.get("id") or class_metadata.get("name")
return _stable_json({
"template_id": seed.get("template_id"),
"class_id": class_id,
"label": seed.get("label"),
"color": seed.get("color"),
})
def _semantic_seed_matches(mask_data: dict[str, Any], seed: dict[str, Any]) -> bool:
"""Best-effort match when a manually edited replacement lacks old lineage ids."""
class_metadata = seed.get("class_metadata") or {}
previous_class = mask_data.get("class") or {}
previous_class_id = previous_class.get("id") or previous_class.get("name")
class_id = class_metadata.get("id") or class_metadata.get("name")
if previous_class_id and class_id and str(previous_class_id) != str(class_id):
return False
return (
mask_data.get("label") == seed.get("label")
and mask_data.get("color") == seed.get("color")
)
def _legacy_seed_matches(mask_data: dict[str, Any], seed: dict[str, Any]) -> bool:
"""Best-effort match for propagation annotations created before seed keys."""
class_metadata = seed.get("class_metadata") or {}
previous_class = mask_data.get("class") or {}
previous_class_id = previous_class.get("id") or previous_class.get("name")
class_id = class_metadata.get("id") or class_metadata.get("name")
return (
mask_data.get("label") == seed.get("label")
and mask_data.get("color") == seed.get("color")
and previous_class_id == class_id
)
def _source_model_matches(mask_data: dict[str, Any], model_id: str) -> bool:
return str(mask_data.get("source") or "") == f"{model_id}_propagation"
def _seed_identity_matches(mask_data: dict[str, Any], seed_key: str, seed: dict[str, Any]) -> bool:
previous_seed_key = mask_data.get("propagation_seed_key")
if previous_seed_key == seed_key:
return True
source_instance_id = seed.get("source_instance_id")
if source_instance_id and (
mask_data.get("source_instance_id") == source_instance_id
or mask_data.get("instance_id") == source_instance_id
):
return True
source_annotation_id = seed.get("source_annotation_id")
if source_annotation_id is not None and str(mask_data.get("source_annotation_id") or "") == str(source_annotation_id):
return True
source_mask_id = seed.get("source_mask_id")
if source_mask_id and mask_data.get("source_mask_id") == source_mask_id:
return True
has_persisted_seed_identity = bool(source_instance_id) or source_annotation_id is not None or bool(source_mask_id)
has_previous_identity = (
bool(previous_seed_key)
or mask_data.get("source_instance_id") is not None
or mask_data.get("instance_id") is not None
or mask_data.get("source_annotation_id") is not None
or bool(mask_data.get("source_mask_id"))
)
if has_persisted_seed_identity or has_previous_identity:
return False
return _legacy_seed_matches(mask_data, seed)
def _seed_identity_markers(seed: dict[str, Any]) -> set[str]:
markers = {f"seed:{_seed_key(seed)}"}
source_instance_id = seed.get("source_instance_id")
if source_instance_id:
markers.add(f"instance:{source_instance_id}")
source_annotation_id = seed.get("source_annotation_id")
if source_annotation_id is not None:
markers.add(f"annotation:{source_annotation_id}")
source_mask_id = seed.get("source_mask_id")
if source_mask_id:
markers.add(f"mask:{source_mask_id}")
return markers
def _mask_identity_markers(mask_data: dict[str, Any]) -> set[str]:
markers: set[str] = set()
previous_seed_key = mask_data.get("propagation_seed_key")
if previous_seed_key:
markers.add(f"seed:{previous_seed_key}")
source_instance_id = mask_data.get("source_instance_id")
if source_instance_id:
markers.add(f"instance:{source_instance_id}")
instance_id = mask_data.get("instance_id")
if instance_id:
markers.add(f"instance:{instance_id}")
source_annotation_id = mask_data.get("source_annotation_id")
if source_annotation_id is not None:
markers.add(f"annotation:{source_annotation_id}")
source_mask_id = mask_data.get("source_mask_id")
if source_mask_id:
markers.add(f"mask:{source_mask_id}")
return markers
def _payload_seed_identity_markers(payload: dict[str, Any]) -> set[str]:
markers: set[str] = set()
for step in payload.get("steps") or []:
seed = step.get("seed") or {}
markers.update(_seed_identity_markers(seed))
return markers
def _is_propagation_annotation(annotation: Annotation, seed_key: str, seed: dict[str, Any]) -> bool:
mask_data = annotation.mask_data or {}
source = str(mask_data.get("source") or "")
if not source.endswith("_propagation"):
return False
return _seed_identity_matches(mask_data, seed_key, seed)
def _direction_matches(mask_data: dict[str, Any], direction: str) -> bool:
previous_direction = mask_data.get("propagation_direction")
return previous_direction in {None, direction}
def _annotation_spatially_matches(annotation: Annotation, polygon: list[list[float]]) -> bool:
"""Use target-frame overlap as a final guard before replacing same-object propagation."""
candidate_bbox = _polygon_bbox(polygon)
for previous_polygon in (annotation.mask_data or {}).get("polygons") or []:
if len(previous_polygon) < 3:
continue
if _bbox_overlap_ratio(_polygon_bbox(previous_polygon), candidate_bbox) >= 0.15:
return True
return False
def _delete_replaced_frame_annotations(
db: Session,
*,
payload: dict[str, Any],
frame_id: int,
seed_key: str,
seed: dict[str, Any],
polygon: list[list[float]],
) -> int:
"""Delete old propagated masks for the same object immediately before writing a new result."""
previous_annotations = (
db.query(Annotation)
.filter(Annotation.project_id == int(payload["project_id"]))
.filter(Annotation.frame_id == frame_id)
.all()
)
deleted_count = 0
current_seed_markers = _seed_identity_markers(seed)
task_seed_markers = _payload_seed_identity_markers(payload)
for annotation in previous_annotations:
mask_data = annotation.mask_data or {}
source = str(mask_data.get("source") or "")
if not source.endswith("_propagation"):
continue
source_instance_id = seed.get("source_instance_id")
mask_instance_ids = {
str(value)
for value in (mask_data.get("source_instance_id"), mask_data.get("instance_id"))
if value
}
if source_instance_id and mask_instance_ids and str(source_instance_id) not in mask_instance_ids:
continue
mask_markers = _mask_identity_markers(mask_data)
# Keep sibling seeds in the same propagation task from deleting each other.
if mask_markers and mask_markers.isdisjoint(current_seed_markers) and not mask_markers.isdisjoint(task_seed_markers):
continue
same_lineage = _seed_identity_matches(mask_data, seed_key, seed)
same_manual_replacement = (
_semantic_seed_matches(mask_data, seed)
and _annotation_spatially_matches(annotation, polygon)
)
if same_lineage or same_manual_replacement:
db.delete(annotation)
deleted_count += 1
if deleted_count:
db.commit()
return deleted_count
def _prepare_seed_propagation(
db: Session,
*,
payload: dict[str, Any],
model_id: str,
seed: dict[str, Any],
direction: str,
target_frame_ids: set[int],
) -> dict[str, Any]:
seed_key = _seed_key(seed)
seed_signature = _seed_signature(seed)
if not target_frame_ids:
return {
"skip": True,
"seed_key": seed_key,
"seed_signature": seed_signature,
"deleted_annotation_count": 0,
}
previous_annotations = (
db.query(Annotation)
.filter(Annotation.project_id == int(payload["project_id"]))
.filter(Annotation.frame_id.in_(target_frame_ids))
.all()
)
matching = [
annotation for annotation in previous_annotations
if _is_propagation_annotation(annotation, seed_key, seed)
and _direction_matches(annotation.mask_data or {}, direction)
]
covered_frame_ids = {int(annotation.frame_id) for annotation in matching}
if matching and all(
(annotation.mask_data or {}).get("propagation_seed_signature") == seed_signature
and _source_model_matches(annotation.mask_data or {}, model_id)
for annotation in matching
) and target_frame_ids.issubset(covered_frame_ids):
return {
"skip": True,
"seed_key": seed_key,
"seed_signature": seed_signature,
"deleted_annotation_count": 0,
}
deleted_count = 0
if matching:
for annotation in matching:
db.delete(annotation)
deleted_count += 1
db.commit()
return {
"skip": False,
"seed_key": seed_key,
"seed_signature": seed_signature,
"deleted_annotation_count": deleted_count,
}
def _frame_window(
frames: list[Frame],
source_position: int,
direction: str,
max_frames: int,
) -> tuple[list[Frame], int]:
count = max(1, min(max_frames, len(frames)))
if direction == "backward":
start = max(0, source_position - count + 1)
return frames[start:source_position + 1], source_position - start
end = min(len(frames), source_position + count)
return frames[source_position:end], 0
def _write_frame_sequence(frames: list[Frame], directory: Path) -> list[str]:
paths = []
for index, frame in enumerate(frames):
data = download_file(frame.image_url)
# SAM2VideoPredictor sorts frames by converting the filename stem to int.
path = directory / f"{index:06d}.jpg"
path.write_bytes(data)
paths.append(str(path))
return paths
def _save_propagated_annotations(
db: Session,
*,
payload: dict[str, Any],
selected_frames: list[Frame],
source_frame: Frame,
propagated: list[dict[str, Any]],
seed: dict[str, Any],
) -> tuple[list[Annotation], int]:
created: list[Annotation] = []
if payload.get("save_annotations", True) is False:
return created, 0
class_metadata = seed.get("class_metadata")
template_id = seed.get("template_id")
label = seed.get("label") or "Propagated Mask"
color = seed.get("color") or "#06b6d4"
model_id = sam_registry.normalize_model_id(payload.get("model"))
include_source = bool(payload.get("include_source", False))
seed_key = _seed_key(seed)
seed_signature = _seed_signature(seed)
source_annotation_id = seed.get("source_annotation_id")
source_mask_id = seed.get("source_mask_id")
source_instance_id = seed.get("source_instance_id") or seed_key
smoothing = _normalize_smoothing_options(seed.get("smoothing"))
direction = str(payload.get("current_direction") or "")
deleted_count = 0
cleaned_frame_ids: set[int] = set()
for frame_result in propagated:
relative_index = int(frame_result.get("frame_index", -1))
if relative_index < 0 or relative_index >= len(selected_frames):
continue
frame = selected_frames[relative_index]
if not include_source and frame.id == source_frame.id:
continue
result_polygons = frame_result.get("polygons") or []
result_holes = frame_result.get("holes") or []
scores = frame_result.get("scores") or []
prepared_polygons = [
(polygon_index, _smooth_polygon(polygon, smoothing))
for polygon_index, polygon in enumerate(result_polygons)
if len(polygon) >= 3
]
cleanup_polygon = next((polygon for _polygon_index, polygon in prepared_polygons if len(polygon) >= 3), None)
if cleanup_polygon is not None and frame.id not in cleaned_frame_ids:
deleted_count += _delete_replaced_frame_annotations(
db,
payload=payload,
frame_id=int(frame.id),
seed_key=seed_key,
seed=seed,
polygon=cleanup_polygon,
)
cleaned_frame_ids.add(int(frame.id))
polygons_to_save: list[list[list[float]]] = []
holes_to_save: list[list[list[list[float]]]] = []
score_values: list[float] = []
for polygon_index, polygon in prepared_polygons:
if len(polygon) < 3:
continue
polygons_to_save.append(polygon)
hole_group = result_holes[polygon_index] if polygon_index < len(result_holes) and isinstance(result_holes[polygon_index], list) else []
holes_to_save.append(hole_group if isinstance(hole_group, list) else [])
if polygon_index < len(scores):
try:
score_values.append(float(scores[polygon_index]))
except (TypeError, ValueError):
pass
if not polygons_to_save:
continue
annotation = Annotation(
project_id=int(payload["project_id"]),
frame_id=frame.id,
template_id=template_id,
mask_data={
"polygons": polygons_to_save,
**({"holes": holes_to_save, "hasHoles": True} if any(holes_to_save) else {}),
"label": label,
"color": color,
"source": f"{model_id}_propagation",
"propagated_from_frame_id": source_frame.id,
"propagated_from_frame_index": source_frame.frame_index,
"propagation_seed_key": seed_key,
"propagation_seed_signature": seed_signature,
"propagation_direction": direction,
"instance_id": source_instance_id,
"source_instance_id": source_instance_id,
"source_annotation_id": source_annotation_id,
"source_mask_id": source_mask_id,
"score": max(score_values) if score_values else None,
**({"scores": score_values} if len(score_values) > 1 else {}),
**({"geometry_smoothing": smoothing} if smoothing else {}),
**({"class": class_metadata} if class_metadata else {}),
},
points=None,
bbox=_polygons_bbox(polygons_to_save),
)
db.add(annotation)
created.append(annotation)
db.commit()
for annotation in created:
db.refresh(annotation)
return created, deleted_count
def _run_one_step(
db: Session,
*,
payload: dict[str, Any],
frames: list[Frame],
source_frame: Frame,
source_position: int,
step: dict[str, Any],
) -> dict[str, Any]:
direction = str(step.get("direction") or "forward").lower()
if direction not in {"forward", "backward"}:
raise ValueError("direction must be forward or backward")
max_frames = max(1, min(int(step.get("max_frames") or payload.get("max_frames") or 30), 500))
seed = step.get("seed") or {}
if not (seed.get("polygons") or seed.get("bbox") or seed.get("points")):
raise ValueError("Propagation requires seed polygons, bbox, or points")
model_id = sam_registry.normalize_model_id(payload.get("model"))
selected_frames, source_relative_index = _frame_window(frames, source_position, direction, max_frames)
include_source = bool(payload.get("include_source", False))
target_frame_ids = {
int(frame.id)
for frame in selected_frames
if include_source or frame.id != source_frame.id
}
seed_state = _prepare_seed_propagation(
db,
payload=payload,
model_id=model_id,
seed=seed,
direction=direction,
target_frame_ids=target_frame_ids,
)
if seed_state["skip"]:
return {
"model": model_id,
"direction": direction,
"processed_frame_count": 0,
"created_annotation_count": 0,
"deleted_annotation_count": 0,
"skipped_seed_count": 1,
"seed_label": seed.get("label"),
"seed_key": seed_state["seed_key"],
}
with tempfile.TemporaryDirectory(prefix=f"seg_propagate_{payload['project_id']}_") as tmpdir:
frame_paths = _write_frame_sequence(selected_frames, Path(tmpdir))
propagated = sam_registry.propagate_video(
model_id,
frame_paths,
source_relative_index,
seed,
direction,
len(selected_frames),
)
save_payload = {**payload, "current_direction": direction}
created, write_cleanup_count = _save_propagated_annotations(
db,
payload=save_payload,
selected_frames=selected_frames,
source_frame=source_frame,
propagated=propagated,
seed=seed,
)
return {
"model": model_id,
"direction": direction,
"processed_frame_count": len(selected_frames),
"created_annotation_count": len(created),
"deleted_annotation_count": int(seed_state["deleted_annotation_count"]) + write_cleanup_count,
"skipped_seed_count": 0,
"seed_label": seed.get("label"),
"seed_key": seed_state["seed_key"],
}
def run_propagate_project_task(db: Session, task_id: int) -> dict[str, Any]:
"""Run one queued SAM propagation task and update persisted progress."""
task = db.query(ProcessingTask).filter(ProcessingTask.id == task_id).first()
if not task:
raise ValueError(f"Task not found: {task_id}")
if task.status == TASK_STATUS_CANCELLED:
return {"task_id": task.id, "status": TASK_STATUS_CANCELLED, "message": task.message or "任务已取消"}
payload = task.payload or {}
project_id = int(payload.get("project_id") or task.project_id or 0)
source_frame_id = int(payload.get("frame_id") or 0)
try:
model_id = sam_registry.normalize_model_id(payload.get("model"))
except ValueError as exc:
_set_task_state(db, task, status=TASK_STATUS_FAILED, progress=100, message="自动传播失败", error=str(exc), finished=True)
raise
project = db.query(Project).filter(Project.id == project_id).first()
if not project:
_set_task_state(db, task, status=TASK_STATUS_FAILED, progress=100, message="项目不存在", error="Project not found", finished=True)
raise ValueError(f"Project not found: {project_id}")
source_frame = db.query(Frame).filter(Frame.id == source_frame_id, Frame.project_id == project_id).first()
if not source_frame:
_set_task_state(db, task, status=TASK_STATUS_FAILED, progress=100, message="参考帧不存在", error="Frame not found", finished=True)
raise ValueError(f"Frame not found: {source_frame_id}")
frames = db.query(Frame).filter(Frame.project_id == project_id).order_by(Frame.frame_index).all()
source_position = next((index for index, frame in enumerate(frames) if frame.id == source_frame.id), None)
if source_position is None:
_set_task_state(db, task, status=TASK_STATUS_FAILED, progress=100, message="参考帧不在项目帧序列中", error="Source frame is not in project frame sequence", finished=True)
raise ValueError("Source frame is not in project frame sequence")
steps = payload.get("steps") or []
if not steps:
_set_task_state(db, task, status=TASK_STATUS_FAILED, progress=100, message="传播任务缺少步骤", error="Propagation task has no steps", finished=True)
raise ValueError("Propagation task has no steps")
_ensure_not_cancelled(db, task)
_set_task_state(db, task, status=TASK_STATUS_RUNNING, progress=5, message="自动传播任务已启动", started=True)
step_results: list[dict[str, Any]] = []
created_count = 0
processed_count = 0
deleted_count = 0
skipped_count = 0
total_steps = len(steps)
try:
for index, step in enumerate(steps, start=1):
_ensure_not_cancelled(db, task)
seed_label = (step.get("seed") or {}).get("label") or "mask"
direction_label = "向前传播" if step.get("direction") == "backward" else "向后传播"
progress_before = 5 + int(((index - 1) / total_steps) * 90)
_set_task_state(
db,
task,
progress=progress_before,
message=f"{direction_label} {seed_label} ({index}/{total_steps})",
result={
"project_id": project_id,
"source_frame_id": source_frame_id,
"model": model_id,
"total_steps": total_steps,
"completed_steps": index - 1,
"processed_frame_count": processed_count,
"created_annotation_count": created_count,
"deleted_annotation_count": deleted_count,
"skipped_seed_count": skipped_count,
"steps": step_results,
},
)
result = _run_one_step(
db,
payload=payload,
frames=frames,
source_frame=source_frame,
source_position=source_position,
step=step,
)
step_results.append(result)
created_count += int(result["created_annotation_count"])
processed_count += int(result["processed_frame_count"])
deleted_count += int(result.get("deleted_annotation_count") or 0)
skipped_count += int(result.get("skipped_seed_count") or 0)
_set_task_state(
db,
task,
progress=5 + int((index / total_steps) * 90),
message=f"{direction_label} {seed_label} 完成 ({index}/{total_steps})",
result={
"project_id": project_id,
"source_frame_id": source_frame_id,
"model": model_id,
"total_steps": total_steps,
"completed_steps": index,
"processed_frame_count": processed_count,
"created_annotation_count": created_count,
"deleted_annotation_count": deleted_count,
"skipped_seed_count": skipped_count,
"steps": step_results,
},
)
result = {
"project_id": project_id,
"source_frame_id": source_frame_id,
"model": model_id,
"total_steps": total_steps,
"completed_steps": total_steps,
"processed_frame_count": processed_count,
"created_annotation_count": created_count,
"deleted_annotation_count": deleted_count,
"skipped_seed_count": skipped_count,
"steps": step_results,
}
_set_task_state(
db,
task,
status=TASK_STATUS_SUCCESS,
progress=100,
message="自动传播完成" if created_count > 0 else (
"自动传播完成,未改变的 mask 已跳过" if skipped_count > 0 else "自动传播完成,但没有生成新的 mask"
),
result=result,
finished=True,
)
return result
except PropagationTaskCancelled:
task.status = TASK_STATUS_CANCELLED
task.progress = 100
task.message = task.message or "任务已取消"
task.error = task.error or "Cancelled by user"
task.finished_at = task.finished_at or _now()
db.commit()
db.refresh(task)
publish_task_progress_event(task)
return {"task_id": task.id, "project_id": project_id, "status": TASK_STATUS_CANCELLED, "message": task.message}
except (ModelUnavailableError, NotImplementedError, ValueError) as exc:
_set_task_state(db, task, status=TASK_STATUS_FAILED, progress=100, message="自动传播失败", error=str(exc), finished=True)
raise
except Exception as exc: # noqa: BLE001
logger.exception("Propagation task failed: task_id=%s", task.id)
_set_task_state(db, task, status=TASK_STATUS_FAILED, progress=100, message="自动传播失败", error=str(exc), finished=True)
raise

View File

@@ -0,0 +1,690 @@
"""SAM 2 engine wrapper with lazy loading and explicit runtime status."""
import logging
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
import numpy as np
from config import settings
logger = logging.getLogger(__name__)
DEFAULT_SAM2_MODEL_ID = "sam2.1_hiera_tiny"
@dataclass(frozen=True)
class SAM2Variant:
"""One selectable SAM 2.1 runtime variant."""
id: str
label: str
short_label: str
config: str
legacy_config: str
checkpoint_filename: str
legacy_checkpoint_filename: str
SAM2_VARIANTS: dict[str, SAM2Variant] = {
"sam2.1_hiera_tiny": SAM2Variant(
id="sam2.1_hiera_tiny",
label="SAM 2.1 Tiny",
short_label="tiny",
config="configs/sam2.1/sam2.1_hiera_t.yaml",
legacy_config="configs/sam2/sam2_hiera_t.yaml",
checkpoint_filename="sam2.1_hiera_tiny.pt",
legacy_checkpoint_filename="sam2_hiera_tiny.pt",
),
"sam2.1_hiera_small": SAM2Variant(
id="sam2.1_hiera_small",
label="SAM 2.1 Small",
short_label="small",
config="configs/sam2.1/sam2.1_hiera_s.yaml",
legacy_config="configs/sam2/sam2_hiera_s.yaml",
checkpoint_filename="sam2.1_hiera_small.pt",
legacy_checkpoint_filename="sam2_hiera_small.pt",
),
"sam2.1_hiera_base_plus": SAM2Variant(
id="sam2.1_hiera_base_plus",
label="SAM 2.1 Base+",
short_label="base+",
config="configs/sam2.1/sam2.1_hiera_b+.yaml",
legacy_config="configs/sam2/sam2_hiera_b+.yaml",
checkpoint_filename="sam2.1_hiera_base_plus.pt",
legacy_checkpoint_filename="sam2_hiera_base_plus.pt",
),
"sam2.1_hiera_large": SAM2Variant(
id="sam2.1_hiera_large",
label="SAM 2.1 Large",
short_label="large",
config="configs/sam2.1/sam2.1_hiera_l.yaml",
legacy_config="configs/sam2/sam2_hiera_l.yaml",
checkpoint_filename="sam2.1_hiera_large.pt",
legacy_checkpoint_filename="sam2_hiera_large.pt",
),
}
SAM2_MODEL_ALIASES = {
"sam2": DEFAULT_SAM2_MODEL_ID,
"sam2.1": DEFAULT_SAM2_MODEL_ID,
"sam2_tiny": DEFAULT_SAM2_MODEL_ID,
}
# ---------------------------------------------------------------------------
# Attempt to import PyTorch and SAM 2; fall back to stubs if unavailable.
# ---------------------------------------------------------------------------
try:
import torch
TORCH_AVAILABLE = True
except Exception as exc: # noqa: BLE001
TORCH_AVAILABLE = False
torch = None # type: ignore[assignment]
logger.warning("PyTorch import failed (%s). SAM2 will be unavailable.", exc)
try:
from sam2.build_sam import build_sam2
from sam2.build_sam import build_sam2_video_predictor
from sam2.sam2_image_predictor import SAM2ImagePredictor
SAM2_AVAILABLE = True
logger.info("SAM2 library imported successfully.")
except Exception as exc: # noqa: BLE001
SAM2_AVAILABLE = False
logger.warning("SAM2 import failed (%s). Using stub engine.", exc)
class SAM2Engine:
"""Lazy-loaded SAM 2 inference engine."""
def __init__(self) -> None:
self._predictors: dict[str, Optional[SAM2ImagePredictor]] = {}
self._video_predictors: dict[str, object | None] = {}
self._model_loaded: dict[str, bool] = {}
self._video_model_loaded: dict[str, bool] = {}
self._loaded_device: dict[str, str] = {}
self._last_error: dict[str, str | None] = {}
self._video_last_error: dict[str, str | None] = {}
# -----------------------------------------------------------------------
# Internal helpers
# -----------------------------------------------------------------------
def variant_ids(self) -> list[str]:
return list(SAM2_VARIANTS.keys())
def normalize_model_id(self, model_id: str | None) -> str:
selected = (model_id or settings.sam_default_model or DEFAULT_SAM2_MODEL_ID).lower()
selected = SAM2_MODEL_ALIASES.get(selected, selected)
if selected not in SAM2_VARIANTS:
raise ValueError(f"Unsupported SAM2 model: {model_id}")
return selected
def is_sam2_model(self, model_id: str | None) -> bool:
try:
self.normalize_model_id(model_id)
return True
except ValueError:
return False
def _models_dir(self) -> Path:
configured_path = Path(settings.sam_model_path)
return configured_path.parent if configured_path.parent else Path("models")
def _variant(self, model_id: str | None) -> SAM2Variant:
return SAM2_VARIANTS[self.normalize_model_id(model_id)]
def _checkpoint_config(self, model_id: str | None) -> tuple[str, str]:
variant_id = self.normalize_model_id(model_id)
variant = SAM2_VARIANTS[variant_id]
models_dir = self._models_dir()
candidates: list[tuple[str, str]] = []
configured_path = Path(settings.sam_model_path)
if variant_id == DEFAULT_SAM2_MODEL_ID and configured_path.is_file():
candidates.append((settings.sam_model_config, str(configured_path)))
candidates.extend([
(variant.config, str(models_dir / variant.checkpoint_filename)),
(variant.legacy_config, str(models_dir / variant.legacy_checkpoint_filename)),
])
for config, checkpoint_path in candidates:
if os.path.isfile(checkpoint_path):
return config, checkpoint_path
return candidates[0]
def _load_model(self, model_id: str | None = None) -> None:
"""Load the SAM 2 model and predictor on first use."""
variant_id = self.normalize_model_id(model_id)
if self._model_loaded.get(variant_id):
return
if not TORCH_AVAILABLE:
self._last_error[variant_id] = "PyTorch is not installed."
logger.warning("PyTorch not available; skipping SAM2 model load.")
self._model_loaded[variant_id] = True
return
if not SAM2_AVAILABLE:
self._last_error[variant_id] = "sam2 package is not installed."
logger.warning("SAM2 not available; skipping model load.")
self._model_loaded[variant_id] = True
return
config, checkpoint_path = self._checkpoint_config(variant_id)
if not os.path.isfile(checkpoint_path):
self._last_error[variant_id] = f"SAM2 checkpoint not found: {checkpoint_path}"
logger.error("SAM checkpoint not found at %s", checkpoint_path)
self._model_loaded[variant_id] = True
return
try:
device = self._best_device()
model = build_sam2(
config,
checkpoint_path,
device=device,
)
self._predictors[variant_id] = SAM2ImagePredictor(model)
self._model_loaded[variant_id] = True
self._loaded_device[variant_id] = device
self._last_error[variant_id] = None
logger.info("SAM 2 model %s loaded from %s on %s", variant_id, checkpoint_path, device)
except Exception as exc: # noqa: BLE001
self._last_error[variant_id] = str(exc)
logger.error("Failed to load SAM 2 model %s: %s", variant_id, exc)
self._model_loaded[variant_id] = True # Prevent repeated load attempts
def _load_video_model(self, model_id: str | None = None) -> None:
"""Load the SAM 2 video predictor on first propagation use."""
variant_id = self.normalize_model_id(model_id)
if self._video_model_loaded.get(variant_id):
return
if not TORCH_AVAILABLE:
self._video_last_error[variant_id] = "PyTorch is not installed."
self._video_model_loaded[variant_id] = True
return
if not SAM2_AVAILABLE:
self._video_last_error[variant_id] = "sam2 package is not installed."
self._video_model_loaded[variant_id] = True
return
config, checkpoint_path = self._checkpoint_config(variant_id)
if not os.path.isfile(checkpoint_path):
self._video_last_error[variant_id] = f"SAM2 checkpoint not found: {checkpoint_path}"
self._video_model_loaded[variant_id] = True
return
try:
device = self._best_device()
self._video_predictors[variant_id] = build_sam2_video_predictor(
config,
checkpoint_path,
device=device,
)
self._video_model_loaded[variant_id] = True
self._loaded_device[variant_id] = device
self._video_last_error[variant_id] = None
logger.info("SAM 2 video predictor %s loaded from %s on %s", variant_id, checkpoint_path, device)
except Exception as exc: # noqa: BLE001
self._video_last_error[variant_id] = str(exc)
self._video_model_loaded[variant_id] = True
logger.error("Failed to load SAM 2 video predictor %s: %s", variant_id, exc)
def _best_device(self) -> str:
if TORCH_AVAILABLE and torch is not None and torch.cuda.is_available():
return "cuda"
return "cpu"
def _ensure_ready(self, model_id: str | None = None) -> bool:
"""Ensure the model is loaded; return whether it is usable."""
variant_id = self.normalize_model_id(model_id)
self._load_model(variant_id)
return SAM2_AVAILABLE and self._predictors.get(variant_id) is not None
def _ensure_video_ready(self, model_id: str | None = None) -> bool:
"""Ensure the video predictor is loaded; return whether it is usable."""
variant_id = self.normalize_model_id(model_id)
self._load_video_model(variant_id)
return SAM2_AVAILABLE and self._video_predictors.get(variant_id) is not None
def status(self, model_id: str | None = None) -> dict:
"""Return lightweight, real runtime status without forcing model load."""
variant_id = self.normalize_model_id(model_id)
variant = SAM2_VARIANTS[variant_id]
_, checkpoint_path = self._checkpoint_config(variant_id)
checkpoint_exists = os.path.isfile(checkpoint_path)
using_legacy_checkpoint = Path(checkpoint_path).name == variant.legacy_checkpoint_filename
predictor = self._predictors.get(variant_id)
device = self._loaded_device.get(variant_id) or self._best_device()
available = bool(TORCH_AVAILABLE and SAM2_AVAILABLE and checkpoint_exists)
if predictor is not None:
message = f"{variant.label} model loaded and ready."
elif available:
message = f"{variant.label} dependencies and checkpoint are present; model will load on first inference."
if using_legacy_checkpoint:
message += " Using legacy SAM 2 checkpoint fallback."
else:
missing = []
if not TORCH_AVAILABLE:
missing.append("PyTorch")
if not SAM2_AVAILABLE:
missing.append("sam2 package")
if not checkpoint_exists:
missing.append("checkpoint")
message = f"{variant.label} unavailable: missing {', '.join(missing)}."
last_error = self._last_error.get(variant_id)
if last_error and not predictor:
message = last_error
return {
"id": variant.id,
"label": variant.label,
"available": available,
"loaded": predictor is not None,
"device": device,
"supports": ["point", "box", "interactive", "auto", "propagate"],
"message": message,
"package_available": SAM2_AVAILABLE,
"checkpoint_exists": checkpoint_exists,
"checkpoint_path": checkpoint_path,
"python_ok": True,
"torch_ok": TORCH_AVAILABLE,
"cuda_required": False,
}
# -----------------------------------------------------------------------
# Public API
# -----------------------------------------------------------------------
def predict_points(
self,
model_id: str | None,
image: np.ndarray,
points: list[list[float]],
labels: list[int],
) -> tuple[list[list[list[float]]], list[float]]:
"""Run point-prompt segmentation.
Args:
image: HWC numpy array (uint8).
points: List of [x, y] normalized coordinates (0-1).
labels: 1 for foreground, 0 for background.
Returns:
Tuple of (polygons, scores).
"""
variant_id = self.normalize_model_id(model_id)
if not self._ensure_ready(variant_id):
logger.warning("SAM2 not ready; returning dummy masks.")
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
try:
predictor = self._predictors[variant_id]
h, w = image.shape[:2]
pts = np.array([[p[0] * w, p[1] * h] for p in points], dtype=np.float32)
lbls = np.array(labels, dtype=np.int32)
with torch.inference_mode(): # type: ignore[name-defined]
predictor.set_image(image)
masks, scores, _ = predictor.predict(
point_coords=pts,
point_labels=lbls,
multimask_output=False,
)
polygons = []
for m in masks:
poly = self._mask_to_polygon(m)
if poly:
polygons.append(poly)
return polygons, scores.tolist()
except Exception as exc: # noqa: BLE001
logger.error("SAM2 point prediction failed: %s", exc)
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
def predict_box(
self,
model_id: str | None,
image: np.ndarray,
box: list[float],
) -> tuple[list[list[list[float]]], list[float]]:
"""Run box-prompt segmentation.
Args:
image: HWC numpy array (uint8).
box: [x1, y1, x2, y2] normalized coordinates.
Returns:
Tuple of (polygons, scores).
"""
variant_id = self.normalize_model_id(model_id)
if not self._ensure_ready(variant_id):
logger.warning("SAM2 not ready; returning dummy masks.")
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
try:
predictor = self._predictors[variant_id]
h, w = image.shape[:2]
bbox = np.array(
[box[0] * w, box[1] * h, box[2] * w, box[3] * h],
dtype=np.float32,
)
with torch.inference_mode(): # type: ignore[name-defined]
predictor.set_image(image)
masks, scores, _ = predictor.predict(
box=bbox[None, :],
multimask_output=False,
)
polygons = []
for m in masks:
poly = self._mask_to_polygon(m)
if poly:
polygons.append(poly)
return polygons, scores.tolist()
except Exception as exc: # noqa: BLE001
logger.error("SAM2 box prediction failed: %s", exc)
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
def predict_interactive(
self,
model_id: str | None,
image: np.ndarray,
box: list[float] | None,
points: list[list[float]],
labels: list[int],
) -> tuple[list[list[list[float]]], list[float]]:
"""Run combined box and point prompt segmentation for refinement."""
variant_id = self.normalize_model_id(model_id)
if not self._ensure_ready(variant_id):
logger.warning("SAM2 not ready; returning dummy masks.")
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
try:
predictor = self._predictors[variant_id]
h, w = image.shape[:2]
bbox = None
if box:
bbox = np.array(
[box[0] * w, box[1] * h, box[2] * w, box[3] * h],
dtype=np.float32,
)
pts = None
lbls = None
if points:
pts = np.array([[p[0] * w, p[1] * h] for p in points], dtype=np.float32)
lbls = np.array(labels, dtype=np.int32)
with torch.inference_mode(): # type: ignore[name-defined]
predictor.set_image(image)
masks, scores, _ = predictor.predict(
point_coords=pts,
point_labels=lbls,
box=bbox,
multimask_output=False,
)
polygons = []
for m in masks:
poly = self._mask_to_polygon(m)
if poly:
polygons.append(poly)
return polygons, scores.tolist()
except Exception as exc: # noqa: BLE001
logger.error("SAM2 interactive prediction failed: %s", exc)
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
def predict_auto(self, model_id: str | None, image: np.ndarray) -> tuple[list[list[list[float]]], list[float]]:
"""Run automatic mask generation (grid of points).
Args:
image: HWC numpy array (uint8).
Returns:
Tuple of (polygons, scores).
"""
variant_id = self.normalize_model_id(model_id)
if not self._ensure_ready(variant_id):
logger.warning("SAM2 not ready; returning dummy masks.")
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
try:
predictor = self._predictors[variant_id]
with torch.inference_mode(): # type: ignore[name-defined]
predictor.set_image(image)
# Generate a uniform 16x16 grid of point prompts
h, w = image.shape[:2]
grid = np.mgrid[0:1:17j, 0:1:17j].reshape(2, -1).T
pts = grid * np.array([w, h])
lbls = np.ones(pts.shape[0], dtype=np.int32)
masks, scores, _ = predictor.predict(
point_coords=pts,
point_labels=lbls,
multimask_output=False,
)
polygons = []
for m in masks[:1]:
poly = self._mask_to_polygon(m)
if poly:
polygons.append(poly)
return polygons, scores[:1].tolist()
except Exception as exc: # noqa: BLE001
logger.error("SAM2 auto prediction failed: %s", exc)
return self._dummy_polygons(image.shape[1], image.shape[0]), [0.5]
def propagate_video(
self,
model_id: str | None,
frame_paths: list[str],
source_frame_index: int,
seed: dict,
direction: str = "forward",
max_frames: int | None = None,
) -> list[dict]:
"""Propagate one seed mask across a prepared frame directory with SAM 2 video."""
variant_id = self.normalize_model_id(model_id)
if not self._ensure_video_ready(variant_id):
raise RuntimeError(self._video_last_error.get(variant_id) or self.status(variant_id)["message"])
video_predictor = self._video_predictors[variant_id]
if not frame_paths:
return []
if source_frame_index < 0 or source_frame_index >= len(frame_paths):
raise ValueError("source_frame_index is outside the frame sequence.")
import cv2
source_image = cv2.imread(frame_paths[source_frame_index])
if source_image is None:
raise RuntimeError("Failed to decode source frame for SAM 2 propagation.")
height, width = source_image.shape[:2]
seed_mask = self._polygons_to_mask(seed.get("polygons") or [], width, height, seed.get("holes") or [])
if not seed_mask.any():
bbox = seed.get("bbox")
if isinstance(bbox, list) and len(bbox) == 4:
seed_mask = self._bbox_to_mask(bbox, width, height)
if not seed_mask.any():
raise ValueError("SAM 2 propagation requires a non-empty seed polygon or bbox.")
inference_state = video_predictor.init_state(
video_path=os.path.dirname(frame_paths[0]),
offload_video_to_cpu=True,
offload_state_to_cpu=True,
)
video_predictor.add_new_mask(
inference_state,
frame_idx=source_frame_index,
obj_id=1,
mask=seed_mask,
)
results: dict[int, dict] = {}
def collect(reverse: bool) -> None:
for out_frame_idx, out_obj_ids, out_mask_logits in video_predictor.propagate_in_video(
inference_state,
start_frame_idx=source_frame_index,
max_frame_num_to_track=max_frames,
reverse=reverse,
):
masks = out_mask_logits
if hasattr(masks, "detach"):
masks = masks.detach().cpu().numpy()
masks = np.asarray(masks)
if masks.ndim == 4:
masks = masks[:, 0]
polygons = []
holes = []
scores = []
for mask in masks:
mask_polygons, mask_holes = self._mask_to_polygon_data(mask > 0)
for polygon_index, polygon in enumerate(mask_polygons):
polygons.append(polygon)
holes.append(mask_holes[polygon_index] if polygon_index < len(mask_holes) else [])
scores.append(1.0)
results[int(out_frame_idx)] = {
"frame_index": int(out_frame_idx),
"polygons": polygons,
"holes": holes,
"scores": scores,
"object_ids": [int(obj_id) for obj_id in list(out_obj_ids)],
}
normalized_direction = direction.lower()
if normalized_direction in {"forward", "both"}:
collect(reverse=False)
if normalized_direction in {"backward", "both"}:
collect(reverse=True)
try:
video_predictor.reset_state(inference_state)
except Exception: # noqa: BLE001
pass
return [results[index] for index in sorted(results)]
# -----------------------------------------------------------------------
# Helpers
# -----------------------------------------------------------------------
@staticmethod
def _mask_to_polygon(mask: np.ndarray) -> list[list[float]]:
"""Convert a binary mask to a normalized polygon."""
polygons, _holes = SAM2Engine._mask_to_polygon_data(mask)
return polygons[0] if polygons else []
@staticmethod
def _mask_to_polygon_data(mask: np.ndarray) -> tuple[list[list[list[float]]], list[list[list[list[float]]]]]:
"""Convert a binary mask to normalized outer polygons and aligned hole rings."""
import cv2
if mask.dtype != np.uint8:
mask = (mask > 0).astype(np.uint8)
contours, hierarchy = cv2.findContours(mask, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
h, w = mask.shape[:2]
if hierarchy is None:
return [], []
def contour_to_polygon(contour: np.ndarray) -> list[list[float]]:
if len(contour) < 3:
return []
return [[float(pt[0][0]) / w, float(pt[0][1]) / h] for pt in contour]
hierarchy_rows = hierarchy[0]
outer_indices = [
index for index, row in enumerate(hierarchy_rows)
if int(row[3]) < 0 and len(contours[index]) >= 3
]
outer_indices.sort(key=lambda index: cv2.contourArea(contours[index]), reverse=True)
polygons: list[list[list[float]]] = []
holes: list[list[list[list[float]]]] = []
for outer_index in outer_indices:
outer = contour_to_polygon(contours[outer_index])
if not outer:
continue
child_index = int(hierarchy_rows[outer_index][2])
hole_group: list[list[list[float]]] = []
while child_index >= 0:
hole = contour_to_polygon(contours[child_index])
if hole:
hole_group.append(hole)
child_index = int(hierarchy_rows[child_index][0])
polygons.append(outer)
holes.append(hole_group)
return polygons, holes
@staticmethod
def _dummy_polygons(w: int, h: int) -> list[list[list[float]]]:
"""Return a dummy rectangle polygon for fallback mode."""
return [
[
[0.25, 0.25],
[0.75, 0.25],
[0.75, 0.75],
[0.25, 0.75],
]
]
@staticmethod
def _polygons_to_mask(
polygons: list[list[list[float]]],
width: int,
height: int,
holes_by_polygon: list[list[list[list[float]]]] | None = None,
) -> np.ndarray:
import cv2
mask = np.zeros((height, width), dtype=np.uint8)
for polygon_index, polygon in enumerate(polygons):
if len(polygon) < 3:
continue
pts = np.array(
[
[
int(round(min(max(float(x), 0.0), 1.0) * max(width - 1, 1))),
int(round(min(max(float(y), 0.0), 1.0) * max(height - 1, 1))),
]
for x, y in polygon
],
dtype=np.int32,
)
cv2.fillPoly(mask, [pts], 1)
holes = holes_by_polygon[polygon_index] if holes_by_polygon and polygon_index < len(holes_by_polygon) else []
for hole in holes:
if len(hole) < 3:
continue
hole_pts = np.array(
[
[
int(round(min(max(float(x), 0.0), 1.0) * max(width - 1, 1))),
int(round(min(max(float(y), 0.0), 1.0) * max(height - 1, 1))),
]
for x, y in hole
],
dtype=np.int32,
)
cv2.fillPoly(mask, [hole_pts], 0)
return mask.astype(bool)
@staticmethod
def _bbox_to_mask(bbox: list[float], width: int, height: int) -> np.ndarray:
x, y, w, h = [min(max(float(value), 0.0), 1.0) for value in bbox]
left = int(round(x * max(width - 1, 1)))
top = int(round(y * max(height - 1, 1)))
right = int(round(min(x + w, 1.0) * max(width - 1, 1)))
bottom = int(round(min(y + h, 1.0) * max(height - 1, 1)))
mask = np.zeros((height, width), dtype=bool)
mask[top:max(bottom + 1, top + 1), left:max(right + 1, left + 1)] = True
return mask
# Singleton instance
sam_engine = SAM2Engine()

View File

@@ -0,0 +1,447 @@
"""SAM 3 engine adapter and runtime status.
The official facebookresearch/sam3 package currently targets Python 3.12+
and CUDA-capable PyTorch. This adapter reports those requirements honestly and
only performs inference when the local runtime can actually import and execute
the package.
"""
from __future__ import annotations
import importlib.util
import json
import logging
import os
import subprocess
import sys
import tempfile
import time
from pathlib import Path
from typing import Any
import numpy as np
from PIL import Image
from config import settings
from services.sam2_engine import SAM2Engine
logger = logging.getLogger(__name__)
try:
import torch
TORCH_AVAILABLE = True
except Exception as exc: # noqa: BLE001
TORCH_AVAILABLE = False
torch = None # type: ignore[assignment]
logger.warning("PyTorch import failed (%s). SAM3 will be unavailable.", exc)
SAM3_PACKAGE_AVAILABLE = importlib.util.find_spec("sam3") is not None
class SAM3Engine:
"""Lazy SAM 3 image inference adapter."""
def __init__(self) -> None:
self._model: Any | None = None
self._processor: Any | None = None
self._model_loaded = False
self._last_error: str | None = None
self._external_status_cache: dict[str, Any] | None = None
self._external_status_checked_at = 0.0
def _python_ok(self) -> bool:
return sys.version_info >= (3, 12)
def _gpu_ok(self) -> bool:
return bool(TORCH_AVAILABLE and torch is not None and torch.cuda.is_available())
def _checkpoint_path(self) -> str | None:
path = settings.sam3_checkpoint_path.strip()
return path if path else None
def _checkpoint_exists(self) -> bool:
path = self._checkpoint_path()
return bool(path and os.path.isfile(path))
def _can_load(self) -> bool:
return bool(
SAM3_PACKAGE_AVAILABLE
and TORCH_AVAILABLE
and self._python_ok()
and self._gpu_ok()
and self._checkpoint_exists()
)
def _worker_path(self) -> Path:
return Path(__file__).with_name("sam3_external_worker.py")
def _external_python_exists(self) -> bool:
return bool(settings.sam3_external_enabled and os.path.isfile(settings.sam3_external_python))
def _external_status(self, force: bool = False) -> dict[str, Any]:
now = time.monotonic()
if (
not force
and self._external_status_cache is not None
and now - self._external_status_checked_at < settings.sam3_status_cache_seconds
):
return self._external_status_cache
if not settings.sam3_external_enabled:
status = {
"available": False,
"package_available": False,
"python_ok": False,
"torch_ok": False,
"cuda_available": False,
"device": "unavailable",
"message": "SAM 3 external runtime is disabled.",
}
elif not self._external_python_exists():
status = {
"available": False,
"package_available": False,
"python_ok": False,
"torch_ok": False,
"cuda_available": False,
"device": "unavailable",
"message": f"SAM 3 external Python not found: {settings.sam3_external_python}",
}
else:
try:
env = os.environ.copy()
env["SAM3_MODEL_VERSION"] = settings.sam3_model_version
if self._checkpoint_path():
env["SAM3_CHECKPOINT_PATH"] = self._checkpoint_path() or ""
completed = subprocess.run(
[settings.sam3_external_python, str(self._worker_path()), "--status"],
capture_output=True,
text=True,
timeout=min(settings.sam3_timeout_seconds, 30),
check=False,
env=env,
)
if completed.returncode != 0:
detail = completed.stderr.strip() or completed.stdout.strip()
status = {
"available": False,
"package_available": False,
"python_ok": False,
"torch_ok": False,
"cuda_available": False,
"device": "unavailable",
"message": f"SAM 3 external status failed: {detail}",
}
else:
status = json.loads(completed.stdout)
except Exception as exc: # noqa: BLE001
status = {
"available": False,
"package_available": False,
"python_ok": False,
"torch_ok": False,
"cuda_available": False,
"device": "unavailable",
"message": f"SAM 3 external status failed: {exc}",
}
self._external_status_cache = status
self._external_status_checked_at = now
return status
def _load_model(self) -> None:
if self._model_loaded:
return
if not self._can_load():
self._last_error = self._status_message()
self._model_loaded = True
return
try:
from sam3.model.sam3_image_processor import Sam3Processor
from sam3.model_builder import build_sam3_image_model
self._model = build_sam3_image_model(
checkpoint_path=self._checkpoint_path(),
load_from_HF=False,
)
self._processor = Sam3Processor(self._model)
self._model_loaded = True
self._last_error = None
logger.info("SAM 3 image model loaded with version setting %s", settings.sam3_model_version)
except Exception as exc: # noqa: BLE001
self._last_error = str(exc)
self._model_loaded = True
logger.error("Failed to load SAM 3 model: %s", exc)
def _ensure_ready(self) -> bool:
self._load_model()
return self._processor is not None
def _status_message(self) -> str:
missing = []
if not SAM3_PACKAGE_AVAILABLE:
missing.append("sam3 package")
if not self._python_ok():
missing.append("Python 3.12+ runtime")
if not TORCH_AVAILABLE:
missing.append("PyTorch")
if not self._gpu_ok():
missing.append("CUDA GPU")
if not self._checkpoint_exists():
missing.append(f"local checkpoint ({settings.sam3_checkpoint_path})")
if missing:
return f"SAM 3 unavailable: missing {', '.join(missing)}."
return "SAM 3 dependencies are present; model will load on first inference."
def status(self) -> dict:
external_status = self._external_status()
available = bool(self._can_load() or external_status.get("available"))
external_ready = bool(external_status.get("available"))
message = self._last_error or self._status_message()
if self._processor is not None:
message = "SAM 3 model loaded and ready."
elif external_ready:
message = "SAM 3 external runtime is ready; local checkpoint will load in the helper process on inference."
elif external_status.get("message") and not self._can_load():
message = str(external_status["message"])
return {
"id": "sam3",
"label": "SAM 3",
"available": available,
"loaded": self._processor is not None,
"device": "cuda" if self._gpu_ok() else str(external_status.get("device", "unavailable")),
"supports": ["semantic", "box", "video_track"],
"message": message,
"package_available": bool(SAM3_PACKAGE_AVAILABLE or external_status.get("package_available")),
"checkpoint_exists": bool(self._checkpoint_exists() or external_status.get("checkpoint_access")),
"checkpoint_path": self._checkpoint_path() or f"official/HuggingFace ({settings.sam3_model_version})",
"python_ok": bool(self._python_ok() or external_status.get("python_ok")),
"torch_ok": bool(TORCH_AVAILABLE or external_status.get("torch_ok")),
"cuda_required": True,
"external_available": external_ready,
"external_python": settings.sam3_external_python if settings.sam3_external_enabled else None,
}
def _xyxy_to_cxcywh(self, box: list[float]) -> list[float]:
if len(box) != 4:
raise ValueError("SAM 3 box prompt requires [x1, y1, x2, y2].")
x1, y1, x2, y2 = [min(max(float(value), 0.0), 1.0) for value in box]
left, right = sorted([x1, x2])
top, bottom = sorted([y1, y2])
width = max(right - left, 1e-6)
height = max(bottom - top, 1e-6)
return [left + width / 2, top + height / 2, width, height]
def _prediction_to_polygons(self, output: Any) -> tuple[list[list[list[float]]], list[float]]:
masks = output.get("masks", [])
scores = output.get("scores", [])
polygons = []
for mask in masks:
if hasattr(mask, "detach"):
mask = mask.detach().cpu().numpy()
if mask.ndim == 3:
mask = mask[0]
poly = SAM2Engine._mask_to_polygon(mask)
if poly:
polygons.append(poly)
if hasattr(scores, "detach"):
scores = scores.detach().cpu().tolist()
elif hasattr(scores, "tolist"):
scores = scores.tolist()
return polygons, list(scores)
def _predict_external(
self,
image: np.ndarray,
prompt_type: str,
*,
text: str = "",
box: list[float] | None = None,
confidence_threshold: float | None = None,
) -> tuple[list[list[list[float]]], list[float]]:
status = self._external_status(force=True)
if not status.get("available"):
raise RuntimeError(status.get("message") or "SAM 3 external runtime is unavailable.")
with tempfile.TemporaryDirectory(prefix="sam3_") as tmpdir:
tmp_path = Path(tmpdir)
image_path = tmp_path / "image.png"
request_path = tmp_path / "request.json"
Image.fromarray(image).save(image_path)
request_path.write_text(
json.dumps(
{
"image_path": str(image_path),
"prompt_type": prompt_type,
"text": text.strip(),
"box": box,
"model_version": settings.sam3_model_version,
"checkpoint_path": self._checkpoint_path(),
"confidence_threshold": (
confidence_threshold
if confidence_threshold is not None
else settings.sam3_confidence_threshold
),
},
ensure_ascii=False,
),
encoding="utf-8",
)
env = os.environ.copy()
env["SAM3_MODEL_VERSION"] = settings.sam3_model_version
if self._checkpoint_path():
env["SAM3_CHECKPOINT_PATH"] = self._checkpoint_path() or ""
completed = subprocess.run(
[settings.sam3_external_python, str(self._worker_path()), "--request", str(request_path)],
capture_output=True,
text=True,
timeout=settings.sam3_timeout_seconds,
check=False,
env=env,
)
if completed.returncode != 0:
detail = completed.stderr.strip() or completed.stdout.strip()
try:
parsed = json.loads(detail)
detail = parsed.get("error", detail)
except Exception: # noqa: BLE001
pass
raise RuntimeError(f"SAM 3 external inference failed: {detail}")
payload = json.loads(completed.stdout)
if payload.get("error"):
raise RuntimeError(str(payload["error"]))
return payload.get("polygons", []), payload.get("scores", [])
def _predict_semantic_external(
self,
image: np.ndarray,
text: str,
confidence_threshold: float | None = None,
) -> tuple[list[list[list[float]]], list[float]]:
return self._predict_external(
image,
"semantic",
text=text,
confidence_threshold=confidence_threshold,
)
def _predict_box_external(self, image: np.ndarray, box: list[float]) -> tuple[list[list[list[float]]], list[float]]:
return self._predict_external(image, "box", box=box)
def _propagate_video_external(
self,
frame_paths: list[str],
source_frame_index: int,
seed: dict[str, Any],
direction: str,
max_frames: int | None,
) -> list[dict[str, Any]]:
status = self._external_status(force=True)
if not status.get("available"):
raise RuntimeError(status.get("message") or "SAM 3 external runtime is unavailable.")
if not frame_paths:
return []
with tempfile.TemporaryDirectory(prefix="sam3_video_") as tmpdir:
request_path = Path(tmpdir) / "request.json"
request_path.write_text(
json.dumps(
{
"prompt_type": "video_track",
"frame_dir": str(Path(frame_paths[0]).parent),
"source_frame_index": source_frame_index,
"seed": seed,
"direction": direction,
"max_frames": max_frames,
"model_version": settings.sam3_model_version,
"checkpoint_path": self._checkpoint_path(),
"confidence_threshold": settings.sam3_confidence_threshold,
},
ensure_ascii=False,
),
encoding="utf-8",
)
env = os.environ.copy()
env["SAM3_MODEL_VERSION"] = settings.sam3_model_version
if self._checkpoint_path():
env["SAM3_CHECKPOINT_PATH"] = self._checkpoint_path() or ""
completed = subprocess.run(
[settings.sam3_external_python, str(self._worker_path()), "--request", str(request_path)],
capture_output=True,
text=True,
timeout=settings.sam3_timeout_seconds,
check=False,
env=env,
)
if completed.returncode != 0:
detail = completed.stderr.strip() or completed.stdout.strip()
try:
parsed = json.loads(detail)
detail = parsed.get("error", detail)
except Exception: # noqa: BLE001
pass
raise RuntimeError(f"SAM 3 external video tracking failed: {detail}")
payload = json.loads(completed.stdout)
if payload.get("error"):
raise RuntimeError(str(payload["error"]))
return payload.get("frames", [])
def predict_semantic(
self,
image: np.ndarray,
text: str,
confidence_threshold: float | None = None,
) -> tuple[list[list[list[float]]], list[float]]:
if not text.strip():
raise ValueError("SAM 3 semantic prompt requires non-empty text.")
if not self._can_load() and self._external_status().get("available"):
return self._predict_semantic_external(image, text, confidence_threshold=confidence_threshold)
if not self._ensure_ready():
raise RuntimeError(self.status()["message"])
pil_image = Image.fromarray(image)
with torch.inference_mode(): # type: ignore[union-attr]
state = self._processor.set_image(pil_image)
output = self._processor.set_text_prompt(state=state, prompt=text.strip())
return self._prediction_to_polygons(output)
def predict_points(self, *_args: Any, **_kwargs: Any) -> tuple[list[list[list[float]]], list[float]]:
raise NotImplementedError("This backend currently exposes SAM 3 semantic text inference; use SAM 2 for point prompts.")
def predict_box(self, image: np.ndarray, box: list[float]) -> tuple[list[list[list[float]]], list[float]]:
if not self._can_load() and self._external_status().get("available"):
return self._predict_box_external(image, box)
if not self._ensure_ready():
raise RuntimeError(self.status()["message"])
pil_image = Image.fromarray(image)
with torch.inference_mode(): # type: ignore[union-attr]
state = self._processor.set_image(pil_image)
output = self._processor.add_geometric_prompt(
state=state,
box=self._xyxy_to_cxcywh(box),
label=True,
)
return self._prediction_to_polygons(output)
def propagate_video(
self,
frame_paths: list[str],
source_frame_index: int,
seed: dict[str, Any],
direction: str = "forward",
max_frames: int | None = None,
) -> list[dict[str, Any]]:
return self._propagate_video_external(frame_paths, source_frame_index, seed, direction, max_frames)
sam3_engine = SAM3Engine()

View File

@@ -0,0 +1,343 @@
"""Standalone SAM 3 helper for the dedicated Python 3.12 runtime.
The main FastAPI backend can keep running in the existing Python 3.11/SAM 2
environment while this helper is executed with a separate conda env that meets
SAM 3's stricter runtime requirements.
"""
from __future__ import annotations
import argparse
import importlib.util
import json
import os
import sys
from pathlib import Path
from typing import Any
import numpy as np
from PIL import Image
def _torch_status() -> tuple[bool, str | None, str | None, str | None]:
try:
import torch
cuda_available = bool(torch.cuda.is_available())
return (
cuda_available,
getattr(torch, "__version__", None),
getattr(torch.version, "cuda", None),
torch.cuda.get_device_name(0) if cuda_available else None,
)
except Exception: # noqa: BLE001
return False, None, None, None
def _compact_error(exc: Exception) -> str:
lines = [line.strip() for line in str(exc).splitlines() if line.strip()]
for line in lines:
if "Access to model" in line or "Cannot access gated repo" in line:
return line
return lines[0] if lines else exc.__class__.__name__
def _checkpoint_access(model_version: str) -> tuple[bool, str | None]:
checkpoint_path = os.environ.get("SAM3_CHECKPOINT_PATH", "").strip()
if checkpoint_path:
path = Path(checkpoint_path)
if path.is_file():
return True, None
return False, f"local checkpoint not found: {checkpoint_path}"
try:
from huggingface_hub import hf_hub_download
repo_id = "facebook/sam3.1" if model_version == "sam3.1" else "facebook/sam3"
hf_hub_download(repo_id=repo_id, filename="config.json")
return True, None
except Exception as exc: # noqa: BLE001
return False, _compact_error(exc)
def runtime_status() -> dict[str, Any]:
model_version = os.environ.get("SAM3_MODEL_VERSION", "sam3")
checkpoint_path = os.environ.get("SAM3_CHECKPOINT_PATH", "").strip() or None
package_error = None
package_available = importlib.util.find_spec("sam3") is not None
if package_available:
try:
import sam3 # noqa: F401
except Exception as exc: # noqa: BLE001
package_available = False
package_error = str(exc)
cuda_available, torch_version, cuda_version, device_name = _torch_status()
python_ok = sys.version_info >= (3, 12)
checkpoint_access = False
checkpoint_error = None
if package_available:
checkpoint_access, checkpoint_error = _checkpoint_access(model_version)
available = bool(package_available and python_ok and cuda_available and checkpoint_access)
missing = []
if not python_ok:
missing.append("Python 3.12+ runtime")
if not package_available:
missing.append(f"sam3 package ({package_error})" if package_error else "sam3 package")
if torch_version is None:
missing.append("PyTorch")
if not cuda_available:
missing.append("CUDA GPU")
if package_available and not checkpoint_access:
missing.append(f"Hugging Face checkpoint access ({checkpoint_error})")
return {
"available": available,
"package_available": package_available,
"checkpoint_access": checkpoint_access,
"checkpoint_path": checkpoint_path or f"official/HuggingFace ({model_version})",
"python_ok": python_ok,
"torch_ok": torch_version is not None,
"torch_version": torch_version,
"cuda_version": cuda_version,
"cuda_available": cuda_available,
"device": "cuda" if cuda_available else "unavailable",
"device_name": device_name,
"message": (
"SAM 3 external runtime is ready."
if available
else f"SAM 3 external runtime unavailable: missing {', '.join(missing)}."
),
}
def _mask_to_polygon(mask: np.ndarray) -> list[list[float]]:
import cv2
if mask.dtype != np.uint8:
mask = (mask > 0).astype(np.uint8)
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
height, width = mask.shape[:2]
largest = []
for contour in contours:
if len(contour) > len(largest):
largest = contour
if len(largest) < 3:
return []
return [[float(point[0][0]) / width, float(point[0][1]) / height] for point in largest]
def _to_numpy(value: Any) -> np.ndarray:
if hasattr(value, "detach"):
value = value.detach()
if hasattr(value, "is_floating_point") and value.is_floating_point():
value = value.float()
value = value.cpu().numpy()
elif hasattr(value, "cpu"):
value = value.cpu()
if hasattr(value, "is_floating_point") and value.is_floating_point():
value = value.float()
value = value.numpy()
return np.asarray(value)
def _xyxy_to_cxcywh(box: list[float]) -> list[float]:
if len(box) != 4:
raise ValueError("SAM 3 box prompt requires [x1, y1, x2, y2].")
x1, y1, x2, y2 = [min(max(float(value), 0.0), 1.0) for value in box]
left, right = sorted([x1, x2])
top, bottom = sorted([y1, y2])
width = max(right - left, 1e-6)
height = max(bottom - top, 1e-6)
return [left + width / 2, top + height / 2, width, height]
def _bbox_from_seed(seed: dict[str, Any]) -> list[float]:
bbox = seed.get("bbox")
if isinstance(bbox, list) and len(bbox) == 4:
return [min(max(float(value), 0.0), 1.0) for value in bbox]
polygons = seed.get("polygons") or []
points = [point for polygon in polygons for point in polygon if len(point) >= 2]
if not points:
raise ValueError("SAM 3 video tracking requires seed bbox or polygons.")
xs = [min(max(float(point[0]), 0.0), 1.0) for point in points]
ys = [min(max(float(point[1]), 0.0), 1.0) for point in points]
left, right = min(xs), max(xs)
top, bottom = min(ys), max(ys)
return [left, top, max(right - left, 1e-6), max(bottom - top, 1e-6)]
def _video_outputs_to_response(outputs: dict[str, Any]) -> dict[str, Any]:
masks = _to_numpy(outputs.get("out_binary_masks", []))
scores = _to_numpy(outputs.get("out_probs", []))
obj_ids = _to_numpy(outputs.get("out_obj_ids", []))
if masks.ndim == 4:
masks = masks[:, 0]
elif masks.ndim == 2:
masks = masks[None, ...]
polygons = []
out_scores = []
out_ids = []
for index, mask in enumerate(masks):
polygon = _mask_to_polygon(mask)
if polygon:
polygons.append(polygon)
out_scores.append(float(scores[index]) if scores.size > index else 1.0)
out_ids.append(int(obj_ids[index]) if obj_ids.size > index else index + 1)
return {"polygons": polygons, "scores": out_scores, "object_ids": out_ids}
def _prediction_to_response(output: dict[str, Any]) -> dict[str, Any]:
masks = _to_numpy(output.get("masks", []))
scores = _to_numpy(output.get("scores", []))
if masks.ndim == 2:
masks = masks[None, :, :]
elif masks.ndim == 4:
masks = masks[:, 0]
elif masks.ndim == 3 and masks.shape[0] == 1:
masks = masks[None, 0]
polygons = []
for mask in masks:
polygon = _mask_to_polygon(mask)
if polygon:
polygons.append(polygon)
return {
"polygons": polygons,
"scores": scores.astype(float).tolist() if scores.size else [],
}
def predict_video(request_path: Path) -> dict[str, Any]:
import torch
from sam3.model_builder import build_sam3_video_predictor
payload = json.loads(request_path.read_text(encoding="utf-8"))
frame_dir = Path(payload["frame_dir"])
source_frame_index = int(payload.get("source_frame_index", 0))
seed = payload.get("seed") or {}
direction = str(payload.get("direction") or "forward").lower()
max_frames = payload.get("max_frames")
max_frames = int(max_frames) if max_frames else None
checkpoint_path = str(payload.get("checkpoint_path") or os.environ.get("SAM3_CHECKPOINT_PATH", "")).strip()
threshold = float(payload.get("confidence_threshold", 0.5))
if direction not in {"forward", "backward", "both"}:
raise ValueError(f"Unsupported propagation direction: {direction}")
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
predictor = build_sam3_video_predictor(
checkpoint_path=checkpoint_path or None,
async_loading_frames=False,
)
session_id = None
try:
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
session = predictor.handle_request(
{
"type": "start_session",
"resource_path": str(frame_dir),
"offload_video_to_cpu": True,
"offload_state_to_cpu": True,
}
)
session_id = session["session_id"]
predictor.handle_request(
{
"type": "add_prompt",
"session_id": session_id,
"frame_index": source_frame_index,
"bounding_boxes": [_bbox_from_seed(seed)],
"bounding_box_labels": [1],
"output_prob_thresh": threshold,
"rel_coordinates": True,
}
)
frames = []
for item in predictor.handle_stream_request(
{
"type": "propagate_in_video",
"session_id": session_id,
"propagation_direction": direction,
"start_frame_index": source_frame_index,
"max_frame_num_to_track": max_frames,
"output_prob_thresh": threshold,
}
):
frame_response = _video_outputs_to_response(item.get("outputs") or {})
frame_response["frame_index"] = int(item["frame_index"])
frames.append(frame_response)
finally:
if session_id:
predictor.handle_request({"type": "close_session", "session_id": session_id})
return {"frames": frames}
def predict(request_path: Path) -> dict[str, Any]:
import torch
from sam3.model.sam3_image_processor import Sam3Processor
from sam3.model_builder import build_sam3_image_model
payload = json.loads(request_path.read_text(encoding="utf-8"))
if str(payload.get("prompt_type") or "").strip().lower() == "video_track":
return predict_video(request_path)
image_path = Path(payload["image_path"])
prompt_type = str(payload.get("prompt_type") or "semantic").strip().lower()
text = str(payload.get("text") or "").strip()
threshold = float(payload.get("confidence_threshold", 0.5))
checkpoint_path = str(payload.get("checkpoint_path") or os.environ.get("SAM3_CHECKPOINT_PATH", "")).strip()
if prompt_type == "semantic" and not text:
raise ValueError("SAM 3 semantic prompt requires non-empty text.")
if prompt_type not in {"semantic", "box"}:
raise ValueError(f"Unsupported SAM 3 prompt type: {prompt_type}")
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
image = Image.open(image_path).convert("RGB")
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
model = build_sam3_image_model(
checkpoint_path=checkpoint_path or None,
load_from_HF=not bool(checkpoint_path),
)
processor = Sam3Processor(model, confidence_threshold=threshold)
state = processor.set_image(image)
if prompt_type == "box":
output = processor.add_geometric_prompt(
state=state,
box=_xyxy_to_cxcywh(payload.get("box") or []),
label=True,
)
else:
output = processor.set_text_prompt(state=state, prompt=text)
return _prediction_to_response(output)
def main() -> int:
parser = argparse.ArgumentParser(description="SAM 3 external runtime helper")
parser.add_argument("--status", action="store_true")
parser.add_argument("--request", type=Path)
args = parser.parse_args()
try:
if args.status:
print(json.dumps(runtime_status(), ensure_ascii=False))
return 0
if args.request:
print(json.dumps(predict(args.request), ensure_ascii=False))
return 0
parser.error("Use --status or --request")
except Exception as exc: # noqa: BLE001
print(json.dumps({"error": str(exc)}, ensure_ascii=False), file=sys.stderr)
return 1
return 2
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,130 @@
"""Model registry for SAM runtimes and GPU status."""
from __future__ import annotations
from typing import Any
from config import settings
from services.sam2_engine import DEFAULT_SAM2_MODEL_ID, TORCH_AVAILABLE, sam_engine as sam2_engine
# SAM 3 integration is intentionally disabled for the current product flow.
# The source files are kept in the repository so the integration can be
# restored later, but the active registry only exposes SAM 2.
# from services.sam3_engine import sam3_engine
try:
import torch
except Exception: # noqa: BLE001
torch = None # type: ignore[assignment]
class ModelUnavailableError(RuntimeError):
"""Raised when a selected model cannot run in this environment."""
class SAMRegistry:
"""Dispatch predictions to the selected SAM backend."""
def __init__(self) -> None:
self._engines = {
"sam2": sam2_engine,
# "sam3": sam3_engine,
}
def normalize_model_id(self, model_id: str | None) -> str:
selected = (model_id or settings.sam_default_model or DEFAULT_SAM2_MODEL_ID).lower()
if self._engines["sam2"].is_sam2_model(selected):
return self._engines["sam2"].normalize_model_id(selected)
if selected not in self._engines:
raise ValueError(f"Unsupported model: {model_id}")
return selected
def runtime_status(self, selected_model: str | None = None) -> dict[str, Any]:
selected = self.normalize_model_id(selected_model)
return {
"selected_model": selected,
"gpu": self.gpu_status(),
"models": [sam2_engine.status(model_id) for model_id in sam2_engine.variant_ids()],
}
def gpu_status(self) -> dict[str, Any]:
cuda_available = bool(TORCH_AVAILABLE and torch is not None and torch.cuda.is_available())
return {
"available": cuda_available,
"device": "cuda" if cuda_available else "cpu",
"name": torch.cuda.get_device_name(0) if cuda_available else None,
"torch_available": bool(TORCH_AVAILABLE),
"torch_version": getattr(torch, "__version__", None) if torch is not None else None,
"cuda_version": getattr(torch.version, "cuda", None) if torch is not None else None,
}
def _engine(self, model_id: str | None) -> Any:
normalized = self.normalize_model_id(model_id)
if self._engines["sam2"].is_sam2_model(normalized):
return self._engines["sam2"]
return self._engines[normalized]
def _ensure_available(self, model_id: str | None) -> Any:
normalized = self.normalize_model_id(model_id)
engine = self._engine(model_id)
status = engine.status(normalized) if engine is sam2_engine else engine.status()
if not status["available"]:
raise ModelUnavailableError(status["message"])
return engine
def predict_points(self, model_id: str | None, image: Any, points: list[list[float]], labels: list[int]):
model = self.normalize_model_id(model_id)
return self._ensure_available(model).predict_points(model, image, points, labels)
def predict_box(self, model_id: str | None, image: Any, box: list[float]):
model = self.normalize_model_id(model_id)
return self._ensure_available(model).predict_box(model, image, box)
def predict_interactive(
self,
model_id: str | None,
image: Any,
box: list[float] | None,
points: list[list[float]],
labels: list[int],
):
model = self.normalize_model_id(model_id)
if not sam2_engine.is_sam2_model(model):
raise NotImplementedError("Interactive box + point refinement is currently supported by SAM 2.")
return self._ensure_available(model).predict_interactive(model, image, box, points, labels)
def predict_auto(self, model_id: str | None, image: Any):
model = self.normalize_model_id(model_id)
return self._ensure_available(model).predict_auto(model, image)
def predict_semantic(
self,
model_id: str | None,
image: Any,
text: str,
confidence_threshold: float | None = None,
):
self.normalize_model_id(model_id)
raise NotImplementedError("Semantic text prompting is disabled; use SAM 2 point or box prompts.")
def propagate_video(
self,
model_id: str | None,
frame_paths: list[str],
source_frame_index: int,
seed: dict[str, Any],
direction: str,
max_frames: int | None,
):
model = self.normalize_model_id(model_id)
return self._ensure_available(model).propagate_video(
model,
frame_paths,
source_frame_index,
seed,
direction=direction,
max_frames=max_frames,
)
sam_registry = SAMRegistry()