- 接入 SAM2 视频传播能力:新增 /api/ai/propagate,支持用当前帧 mask/polygon/bbox 作为 seed,通过 SAM2 video predictor 向前、向后或双向传播,并可保存为真实 annotation。 - 接入 SAM3 video tracker:通过独立 Python 3.12 external worker 调用 SAM3 video predictor/tracker,使用本地 checkpoint 与 bbox seed 执行视频级跟踪,并在模型状态中标记 video_track 能力。 - 完善 SAM 模型分发:sam_registry 按 model_id 明确区分 sam2 propagation 与 sam3 video_track,避免两个模型链路混用。 - 打通前端“传播片段”:VideoWorkspace 使用当前选中 mask 和当前 AI 模型调用后端传播接口,传播结果回写并刷新工作区已保存标注。 - 增强 SAM3 本地 checkpoint 配置:新增 sam3_checkpoint_path 配置和 .env.example 示例,状态检查改为基于本地 checkpoint/独立环境/模型包可用性。 - 完善视频拆帧参数:/api/media/parse 支持 parse_fps、max_frames、target_width,后端任务保存帧时间戳、源帧号和 frame_sequence 元数据。 - 增加运行时 schema 兼容处理:启动时为旧 frames 表补充 timestamp_ms 和 source_frame_number 列,避免旧库升级后缺字段。 - 强化 Canvas 标注编辑:补齐多边形闭合、点工具、顶点拖拽、边中点插入、Delete/Backspace 删除、区域合并和重叠去除等交互。 - 增强语义分类联动:选中 mask 后可通过右侧语义分类树更新标签、颜色和 class metadata,并同步到保存/导出链路。 - 增加关键帧时间轴体验:FrameTimeline 显示具体时间信息,并支持键盘左右方向键切换关键帧。 - 完善 AI 交互分割参数:前端保留正向点、反向点、框选和 interactive prompt 的调用状态,支持 SAM2 细化候选区域与 SAM3 bbox 入口。 - 扩展后端/前端 API 类型:新增 propagateMasks、传播请求/响应 schema,并补齐 annotation、导出、模型状态和任务接口的测试覆盖。 - 更新项目文档:同步 README、AGENTS、接口契约、需求冻结、设计冻结、前端元素审计、实施计划和测试计划,标明真实功能边界与剩余风险。 - 增加测试覆盖:补充 SAM2/SAM3 传播、SAM3 状态、媒体拆帧参数、Canvas 编辑、语义标签切换、时间轴、工作区传播和 API 合约测试。 - 加强仓库安全边界:将 sam3权重/ 加入 .gitignore,避免本地模型权重被误提交。 验证:npm run test:run;pytest backend/tests;npm run lint;npm run build;python -m py_compile;git diff --check。
228 lines
7.5 KiB
Python
228 lines
7.5 KiB
Python
"""Video/DICOM frame parsing and MinIO upload utilities."""
|
|
|
|
import logging
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import List, Optional, Tuple
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from pydicom import dcmread
|
|
|
|
from minio_client import upload_file, BUCKET_NAME
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def get_video_fps(video_path: str) -> float:
|
|
"""Read the original frame rate of a video file."""
|
|
cap = cv2.VideoCapture(video_path)
|
|
if not cap.isOpened():
|
|
return 30.0
|
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
cap.release()
|
|
return fps if fps > 0 else 30.0
|
|
|
|
|
|
def extract_thumbnail(video_path: str, output_path: str, width: int = 640) -> str:
|
|
"""Extract the first frame of a video as a thumbnail JPEG."""
|
|
cap = cv2.VideoCapture(video_path)
|
|
if not cap.isOpened():
|
|
raise RuntimeError(f"Cannot open video for thumbnail: {video_path}")
|
|
ret, frame = cap.read()
|
|
cap.release()
|
|
if not ret or frame is None:
|
|
raise RuntimeError(f"Cannot read first frame from: {video_path}")
|
|
|
|
h, w = frame.shape[:2]
|
|
if w > width:
|
|
scale = width / w
|
|
new_w = int(w * scale)
|
|
new_h = int(h * scale)
|
|
frame = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
|
|
|
|
cv2.imwrite(output_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
|
|
return output_path
|
|
|
|
|
|
def parse_video(
|
|
video_path: str,
|
|
output_dir: str,
|
|
fps: int = 30,
|
|
max_frames: Optional[int] = None,
|
|
target_width: int = 640,
|
|
) -> Tuple[List[str], float]:
|
|
"""Extract frames from a video file using FFmpeg or OpenCV fallback.
|
|
|
|
Args:
|
|
video_path: Path to the input video file.
|
|
output_dir: Directory to save extracted frames.
|
|
fps: Target frame extraction rate.
|
|
max_frames: Optional maximum number of frames to extract.
|
|
target_width: Output frame width for model-friendly frame sequences.
|
|
|
|
Returns:
|
|
Tuple of (frame_paths, original_fps).
|
|
"""
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
frame_paths: List[str] = []
|
|
original_fps = get_video_fps(video_path)
|
|
safe_fps = max(int(fps), 1)
|
|
safe_width = max(int(target_width), 1)
|
|
|
|
# Try FFmpeg first
|
|
if shutil.which("ffmpeg"):
|
|
try:
|
|
pattern = os.path.join(output_dir, "frame_%06d.jpg")
|
|
cmd = [
|
|
"ffmpeg",
|
|
"-i", video_path,
|
|
"-vf", f"fps={safe_fps},scale={safe_width}:-1",
|
|
"-start_number", "0",
|
|
"-q:v", "5",
|
|
"-y",
|
|
pattern,
|
|
]
|
|
logger.info("Running FFmpeg: %s", " ".join(cmd))
|
|
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
|
|
if result.returncode == 0:
|
|
frame_paths = sorted(
|
|
[os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".jpg")]
|
|
)
|
|
if max_frames:
|
|
frame_paths = frame_paths[:max_frames]
|
|
logger.info("Extracted %d frames via FFmpeg", len(frame_paths))
|
|
return frame_paths, original_fps
|
|
else:
|
|
logger.warning("FFmpeg failed: %s", result.stderr)
|
|
except Exception as exc: # noqa: BLE001
|
|
logger.warning("FFmpeg exception: %s", exc)
|
|
|
|
# OpenCV fallback
|
|
logger.info("Falling back to OpenCV frame extraction")
|
|
cap = cv2.VideoCapture(video_path)
|
|
if not cap.isOpened():
|
|
raise RuntimeError(f"Cannot open video: {video_path}")
|
|
|
|
video_fps = cap.get(cv2.CAP_PROP_FPS) or 30
|
|
interval = max(1, int(round(video_fps / safe_fps)))
|
|
count = 0
|
|
saved = 0
|
|
|
|
while True:
|
|
ret, frame = cap.read()
|
|
if not ret:
|
|
break
|
|
if count % interval == 0:
|
|
path = os.path.join(output_dir, f"frame_{saved:06d}.jpg")
|
|
h, w = frame.shape[:2]
|
|
if safe_width > 0 and w != safe_width:
|
|
scale = safe_width / max(w, 1)
|
|
frame = cv2.resize(frame, (safe_width, max(1, int(round(h * scale)))), interpolation=cv2.INTER_AREA)
|
|
cv2.imwrite(path, frame, [cv2.IMWRITE_JPEG_QUALITY, 80])
|
|
frame_paths.append(path)
|
|
saved += 1
|
|
if max_frames and saved >= max_frames:
|
|
break
|
|
count += 1
|
|
|
|
cap.release()
|
|
logger.info("Extracted %d frames via OpenCV", len(frame_paths))
|
|
return frame_paths, original_fps
|
|
|
|
|
|
def parse_dicom(
|
|
dicom_dir: str,
|
|
output_dir: str,
|
|
max_frames: Optional[int] = None,
|
|
) -> List[str]:
|
|
"""Extract frames from DICOM files in a directory.
|
|
|
|
Args:
|
|
dicom_dir: Directory containing .dcm files.
|
|
output_dir: Directory to save extracted frames.
|
|
max_frames: Optional maximum number of frames to extract.
|
|
|
|
Returns:
|
|
List of paths to extracted frame images.
|
|
"""
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
dcm_files = sorted(
|
|
[f for f in os.listdir(dicom_dir) if f.lower().endswith(".dcm")]
|
|
)
|
|
|
|
frame_paths: List[str] = []
|
|
for idx, fname in enumerate(dcm_files):
|
|
if max_frames and idx >= max_frames:
|
|
break
|
|
path = os.path.join(dicom_dir, fname)
|
|
try:
|
|
ds = dcmread(path)
|
|
pixel_array = ds.pixel_array
|
|
|
|
# Normalize to 8-bit
|
|
if pixel_array.dtype != np.uint8:
|
|
pixel_array = pixel_array.astype(np.float32)
|
|
pixel_array = (
|
|
(pixel_array - pixel_array.min())
|
|
/ (pixel_array.max() - pixel_array.min() + 1e-8)
|
|
* 255
|
|
)
|
|
pixel_array = pixel_array.astype(np.uint8)
|
|
|
|
# Handle multi-frame DICOM
|
|
if pixel_array.ndim == 3:
|
|
for f in range(pixel_array.shape[0]):
|
|
out_path = os.path.join(output_dir, f"frame_{idx:06d}_{f:03d}.jpg")
|
|
cv2.imwrite(out_path, pixel_array[f], [cv2.IMWRITE_JPEG_QUALITY, 85])
|
|
frame_paths.append(out_path)
|
|
else:
|
|
out_path = os.path.join(output_dir, f"frame_{idx:06d}.jpg")
|
|
cv2.imwrite(out_path, pixel_array, [cv2.IMWRITE_JPEG_QUALITY, 85])
|
|
frame_paths.append(out_path)
|
|
except Exception as exc: # noqa: BLE001
|
|
logger.error("Failed to read DICOM %s: %s", path, exc)
|
|
|
|
logger.info("Extracted %d frames from DICOM", len(frame_paths))
|
|
return frame_paths
|
|
|
|
|
|
def upload_frames_to_minio(
|
|
frames: List[str],
|
|
project_id: int,
|
|
object_prefix: Optional[str] = None,
|
|
) -> List[str]:
|
|
"""Upload a list of local frame images to MinIO.
|
|
|
|
Args:
|
|
frames: List of local file paths.
|
|
project_id: Project ID used for bucket path organization.
|
|
object_prefix: Optional prefix override.
|
|
|
|
Returns:
|
|
List of object names (paths) in MinIO.
|
|
"""
|
|
prefix = object_prefix or f"projects/{project_id}/frames"
|
|
object_names: List[str] = []
|
|
|
|
for frame_path in frames:
|
|
fname = os.path.basename(frame_path)
|
|
object_name = f"{prefix}/{fname}"
|
|
try:
|
|
with open(frame_path, "rb") as f:
|
|
data = f.read()
|
|
upload_file(
|
|
object_name,
|
|
data,
|
|
content_type="image/jpeg",
|
|
length=len(data),
|
|
)
|
|
object_names.append(object_name)
|
|
except Exception as exc: # noqa: BLE001
|
|
logger.error("Failed to upload %s: %s", frame_path, exc)
|
|
|
|
logger.info("Uploaded %d/%d frames to MinIO", len(object_names), len(frames))
|
|
return object_names
|