20260430_001744-feat: PyTorch CUDA + SAM2 GPU inference, video thumbnail, real FPS + configurable parse FPS, DICOM batch import

This commit is contained in:
2026-04-30 00:30:58 +08:00
parent 35d6e1503c
commit 6d008ec4a2
15 changed files with 555 additions and 101 deletions

5
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,5 @@
{
"workbench.editorAssociations": {
"*.mp4": "default"
}
}

View File

@@ -20,7 +20,7 @@ class Settings(BaseSettings):
# SAM2
sam_model_path: str = "/home/wkmgc/Desktop/Seg_Server/models/sam2_hiera_tiny.pt"
sam_model_config: str = "sam2_hiera_t.yaml"
sam_model_config: str = "configs/sam2/sam2_hiera_t.yaml"
# App
app_env: str = "development"

View File

@@ -30,7 +30,7 @@ def _seed_default_project_sync() -> None:
"""Synchronously seed the default video project on first startup."""
import cv2
from models import Project, Frame
from services.frame_parser import parse_video, upload_frames_to_minio
from services.frame_parser import parse_video, upload_frames_to_minio, extract_thumbnail
db = SessionLocal()
try:
@@ -46,6 +46,8 @@ def _seed_default_project_sync() -> None:
name="Data_MyVideo_1",
description="默认演示视频",
status="pending",
source_type="video",
parse_fps=30.0,
)
db.add(project)
db.commit()
@@ -67,7 +69,20 @@ def _seed_default_project_sync() -> None:
f.write(data)
output_dir = os.path.join(tmp_dir, "frames")
os.makedirs(output_dir, exist_ok=True)
frame_files = parse_video(local_path, output_dir, fps=30, max_frames=100)
frame_files, original_fps = parse_video(local_path, output_dir, fps=30, max_frames=100)
project.original_fps = original_fps
# Extract thumbnail
thumbnail_path = os.path.join(tmp_dir, "thumbnail.jpg")
try:
extract_thumbnail(local_path, thumbnail_path)
with open(thumbnail_path, "rb") as f:
thumb_data = f.read()
thumb_object = f"projects/{project.id}/thumbnail.jpg"
upload_file(thumb_object, thumb_data, content_type="image/jpeg", length=len(thumb_data))
project.thumbnail_url = thumb_object
except Exception as exc: # noqa: BLE001
logger.warning("Thumbnail extraction failed: %s", exc)
object_names = upload_frames_to_minio(frame_files, project.id)

View File

@@ -25,7 +25,11 @@ class Project(Base):
name = Column(String(255), nullable=False)
description = Column(Text, nullable=True)
video_path = Column(String(512), nullable=True)
thumbnail_url = Column(String(512), nullable=True)
status = Column(String(50), default="Ready", nullable=False)
source_type = Column(String(20), default="video", nullable=False) # video | dicom
original_fps = Column(Float, nullable=True)
parse_fps = Column(Float, default=30.0, nullable=False)
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()

View File

@@ -6,16 +6,19 @@ import shutil
import subprocess
import tempfile
from pathlib import Path
from typing import Optional
from typing import List, Optional
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
from sqlalchemy.orm import Session
from database import get_db
from minio_client import upload_file, get_presigned_url
from minio_client import upload_file, get_presigned_url, download_file
from models import Project, Frame
from schemas import FrameOut
from services.frame_parser import parse_video, parse_dicom, upload_frames_to_minio
from services.frame_parser import (
parse_video, parse_dicom, upload_frames_to_minio,
extract_thumbnail, get_video_fps,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/media", tags=["Media"])
@@ -78,6 +81,7 @@ async def upload_media(
description="Auto-created from upload",
status="pending",
video_path=object_name,
source_type="video",
)
db.add(project)
db.commit()
@@ -90,7 +94,6 @@ async def upload_media(
db.commit()
logger.info("Auto-created project id=%s for upload %s", project_id, file.filename)
# TODO: enqueue async parsing job (Celery / background task)
logger.info("Upload complete: %s (size=%d bytes). Async parsing queued.", object_name, len(data))
return {
@@ -102,6 +105,66 @@ async def upload_media(
}
@router.post(
"/upload/dicom",
status_code=status.HTTP_201_CREATED,
summary="Upload multiple DICOM files",
)
async def upload_dicom_batch(
files: List[UploadFile] = File(...),
project_id: Optional[int] = Form(None),
db: Session = Depends(get_db),
) -> dict:
"""Upload multiple .dcm files for a DICOM series.
If project_id is provided, files are added to the existing project.
Otherwise a new DICOM project is created.
"""
if not files:
raise HTTPException(status_code=400, detail="No files uploaded")
uploaded = []
if project_id:
project = db.query(Project).filter(Project.id == project_id).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
else:
# Create new DICOM project
first_name = files[0].filename or "DICOM_Series"
project = Project(
name=first_name,
description=f"DICOM series with {len(files)} files",
status="pending",
source_type="dicom",
)
db.add(project)
db.commit()
db.refresh(project)
project_id = project.id
logger.info("Auto-created DICOM project id=%s", project_id)
for file in files:
if not file.filename or not file.filename.lower().endswith(".dcm"):
continue
data = await file.read()
object_name = f"uploads/{project_id}/dicom/{file.filename}"
try:
upload_file(object_name, data, content_type="application/dicom", length=len(data))
uploaded.append(object_name)
except Exception as exc: # noqa: BLE001
logger.error("Failed to upload DICOM %s: %s", file.filename, exc)
project.video_path = f"uploads/{project_id}/dicom"
db.commit()
return {
"project_id": project_id,
"uploaded_count": len(uploaded),
"message": f"Uploaded {len(uploaded)} DICOM files. Parsing job queued.",
}
@router.post(
"/parse",
status_code=status.HTTP_202_ACCEPTED,
@@ -109,12 +172,12 @@ async def upload_media(
)
def parse_media(
project_id: int,
source_type: str = "video", # video | dicom
source_type: Optional[str] = None,
db: Session = Depends(get_db),
) -> dict:
"""Trigger frame extraction for a project's uploaded media.
* video: uses FFmpeg or OpenCV fallback.
* video: uses FFmpeg or OpenCV fallback, extracts thumbnail.
* dicom: uses pydicom to read DCM frames.
Extracted frames are uploaded to MinIO and registered in the database.
@@ -126,37 +189,53 @@ def parse_media(
if not project.video_path:
raise HTTPException(status_code=400, detail="Project has no media uploaded")
# Download from MinIO to a temp directory
from minio_client import download_file
try:
media_bytes = download_file(project.video_path)
except Exception as exc: # noqa: BLE001
logger.error("Failed to download media for parsing: %s", exc)
raise HTTPException(status_code=500, detail="Failed to retrieve media from storage") from exc
effective_source = source_type or project.source_type or "video"
parse_fps = project.parse_fps or 30.0
tmp_dir = tempfile.mkdtemp(prefix=f"seg_parse_{project_id}_")
local_path = os.path.join(tmp_dir, Path(project.video_path).name)
with open(local_path, "wb") as f:
f.write(media_bytes)
output_dir = os.path.join(tmp_dir, "frames")
os.makedirs(output_dir, exist_ok=True)
try:
if source_type == "dicom":
# For DICOM, treat local_path as a directory if it contains multiple .dcm
# If a single .dcm file was uploaded, put it in its own sub-dir
if effective_source == "dicom":
# Download all dicom files from MinIO
dcm_dir = os.path.join(tmp_dir, "dcm")
os.makedirs(dcm_dir, exist_ok=True)
if local_path.lower().endswith(".dcm"):
shutil.move(local_path, os.path.join(dcm_dir, os.path.basename(local_path)))
else:
shutil.unpack_archive(local_path, dcm_dir) if shutil.which("unzip") else shutil.move(local_path, dcm_dir)
from minio_client import get_minio_client, BUCKET_NAME
client = get_minio_client()
prefix = project.video_path
objects = list(client.list_objects(BUCKET_NAME, prefix=prefix, recursive=True))
for obj in objects:
if obj.object_name.lower().endswith(".dcm"):
data = download_file(obj.object_name)
local_dcm = os.path.join(dcm_dir, os.path.basename(obj.object_name))
with open(local_dcm, "wb") as f:
f.write(data)
frame_files = parse_dicom(dcm_dir, output_dir)
else:
frame_files = parse_video(local_path, output_dir, fps=30)
# Video: download and parse
media_bytes = download_file(project.video_path)
local_path = os.path.join(tmp_dir, Path(project.video_path).name)
with open(local_path, "wb") as f:
f.write(media_bytes)
frame_files, original_fps = parse_video(local_path, output_dir, fps=int(parse_fps))
project.original_fps = original_fps
# Extract thumbnail from first frame
thumbnail_path = os.path.join(tmp_dir, "thumbnail.jpg")
try:
extract_thumbnail(local_path, thumbnail_path)
with open(thumbnail_path, "rb") as f:
thumb_data = f.read()
thumb_object = f"projects/{project_id}/thumbnail.jpg"
upload_file(thumb_object, thumb_data, content_type="image/jpeg", length=len(thumb_data))
project.thumbnail_url = thumb_object
logger.info("Uploaded thumbnail for project_id=%s", project_id)
except Exception as exc: # noqa: BLE001
logger.warning("Thumbnail extraction failed: %s", exc)
except Exception as exc: # noqa: BLE001
logger.error("Frame extraction failed: %s", exc)
shutil.rmtree(tmp_dir, ignore_errors=True)
@@ -173,7 +252,6 @@ def parse_media(
# Register frames in DB
frames_out = []
for idx, obj_name in enumerate(object_names):
# Get image dimensions
local_frame = frame_files[idx]
try:
import cv2

View File

@@ -44,6 +44,8 @@ def list_projects(skip: int = 0, limit: int = 100, db: Session = Depends(get_db)
projects = db.query(Project).offset(skip).limit(limit).all()
for p in projects:
p.frame_count = len(p.frames)
if p.thumbnail_url:
p.thumbnail_url = get_presigned_url(p.thumbnail_url, expires=3600)
return projects
@@ -58,6 +60,8 @@ def get_project(project_id: int, db: Session = Depends(get_db)) -> Project:
if not project:
raise HTTPException(status_code=404, detail="Project not found")
project.frame_count = len(project.frames)
if project.thumbnail_url:
project.thumbnail_url = get_presigned_url(project.thumbnail_url, expires=3600)
return project

View File

@@ -12,7 +12,11 @@ class ProjectBase(BaseModel):
name: str
description: Optional[str] = None
video_path: Optional[str] = None
thumbnail_url: Optional[str] = None
status: Optional[str] = "pending"
source_type: Optional[str] = "video"
original_fps: Optional[float] = None
parse_fps: Optional[float] = 30.0
class ProjectCreate(ProjectBase):
@@ -23,7 +27,11 @@ class ProjectUpdate(BaseModel):
name: Optional[str] = None
description: Optional[str] = None
video_path: Optional[str] = None
thumbnail_url: Optional[str] = None
status: Optional[str] = None
source_type: Optional[str] = None
original_fps: Optional[float] = None
parse_fps: Optional[float] = None
class ProjectOut(ProjectBase):
@@ -103,7 +111,7 @@ class AnnotationCreate(AnnotationBase):
class AnnotationUpdate(BaseModel):
mask_data: Optional[dict[str, Any]] = None
points: Optional[list[list[float]]] = None
points: Optional[list[float]] = None
bbox: Optional[list[float]] = None
template_id: Optional[int] = None

View File

@@ -5,7 +5,7 @@ import os
import shutil
import subprocess
from pathlib import Path
from typing import List, Optional
from typing import List, Optional, Tuple
import cv2
import numpy as np
@@ -16,12 +16,43 @@ from minio_client import upload_file, BUCKET_NAME
logger = logging.getLogger(__name__)
def get_video_fps(video_path: str) -> float:
"""Read the original frame rate of a video file."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return 30.0
fps = cap.get(cv2.CAP_PROP_FPS)
cap.release()
return fps if fps > 0 else 30.0
def extract_thumbnail(video_path: str, output_path: str, width: int = 640) -> str:
"""Extract the first frame of a video as a thumbnail JPEG."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise RuntimeError(f"Cannot open video for thumbnail: {video_path}")
ret, frame = cap.read()
cap.release()
if not ret or frame is None:
raise RuntimeError(f"Cannot read first frame from: {video_path}")
h, w = frame.shape[:2]
if w > width:
scale = width / w
new_w = int(w * scale)
new_h = int(h * scale)
frame = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
cv2.imwrite(output_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
return output_path
def parse_video(
video_path: str,
output_dir: str,
fps: int = 30,
max_frames: Optional[int] = None,
) -> List[str]:
) -> Tuple[List[str], float]:
"""Extract frames from a video file using FFmpeg or OpenCV fallback.
Args:
@@ -31,10 +62,11 @@ def parse_video(
max_frames: Optional maximum number of frames to extract.
Returns:
List of paths to extracted frame images.
Tuple of (frame_paths, original_fps).
"""
os.makedirs(output_dir, exist_ok=True)
frame_paths: List[str] = []
original_fps = get_video_fps(video_path)
# Try FFmpeg first
if shutil.which("ffmpeg"):
@@ -57,7 +89,7 @@ def parse_video(
if max_frames:
frame_paths = frame_paths[:max_frames]
logger.info("Extracted %d frames via FFmpeg", len(frame_paths))
return frame_paths
return frame_paths, original_fps
else:
logger.warning("FFmpeg failed: %s", result.stderr)
except Exception as exc: # noqa: BLE001
@@ -89,7 +121,7 @@ def parse_video(
cap.release()
logger.info("Extracted %d frames via OpenCV", len(frame_paths))
return frame_paths
return frame_paths, original_fps
def parse_dicom(
@@ -134,12 +166,12 @@ def parse_dicom(
# Handle multi-frame DICOM
if pixel_array.ndim == 3:
for f in range(pixel_array.shape[0]):
out_path = os.path.join(output_dir, f"frame_{idx:06d}_{f:03d}.png")
cv2.imwrite(out_path, pixel_array[f])
out_path = os.path.join(output_dir, f"frame_{idx:06d}_{f:03d}.jpg")
cv2.imwrite(out_path, pixel_array[f], [cv2.IMWRITE_JPEG_QUALITY, 85])
frame_paths.append(out_path)
else:
out_path = os.path.join(output_dir, f"frame_{idx:06d}.png")
cv2.imwrite(out_path, pixel_array)
out_path = os.path.join(output_dir, f"frame_{idx:06d}.jpg")
cv2.imwrite(out_path, pixel_array, [cv2.IMWRITE_JPEG_QUALITY, 85])
frame_paths.append(out_path)
except Exception as exc: # noqa: BLE001
logger.error("Failed to read DICOM %s: %s", path, exc)

View File

@@ -1,8 +1,8 @@
import React, { useState, useEffect, useRef } from 'react';
import { UploadCloud, Film, Settings2, MoreHorizontal, Plus, Loader2 } from 'lucide-react';
import { UploadCloud, Film, Settings2, MoreHorizontal, Plus, Loader2, Activity } from 'lucide-react';
import { cn } from '../lib/utils';
import { useStore } from '../store/useStore';
import { getProjects, createProject, uploadMedia, parseMedia } from '../lib/api';
import { getProjects, createProject, uploadMedia, parseMedia, uploadDicomBatch } from '../lib/api';
import type { Project } from '../store/useStore';
interface ProjectLibraryProps {
@@ -19,7 +19,12 @@ export function ProjectLibrary({ onProjectSelect }: ProjectLibraryProps) {
const [showModal, setShowModal] = useState(false);
const [newName, setNewName] = useState('');
const [newDesc, setNewDesc] = useState('');
const fileInputRef = useRef<HTMLInputElement>(null);
const [showImportMenu, setShowImportMenu] = useState(false);
const [showVideoConfig, setShowVideoConfig] = useState(false);
const [pendingFile, setPendingFile] = useState<File | null>(null);
const [parseFps, setParseFps] = useState(30);
const videoInputRef = useRef<HTMLInputElement>(null);
const dicomInputRef = useRef<HTMLInputElement>(null);
useEffect(() => {
setIsLoading(true);
@@ -50,6 +55,60 @@ export function ProjectLibrary({ onProjectSelect }: ProjectLibraryProps) {
onProjectSelect();
};
const handleVideoSelect = (file: File) => {
setPendingFile(file);
setParseFps(30);
setShowVideoConfig(true);
};
const handleVideoUpload = async () => {
if (!pendingFile) return;
setShowVideoConfig(false);
setIsLoading(true);
try {
const newProject = await createProject({
name: pendingFile.name,
description: `导入于 ${new Date().toLocaleString()}`,
parse_fps: parseFps,
});
const result = await uploadMedia(pendingFile, String(newProject.id));
await parseMedia(String(newProject.id));
alert(`上传成功: ${pendingFile.name}\n已保存至: ${result.url}`);
const data = await getProjects();
setProjects(data);
} catch (err) {
console.error('Upload failed:', err);
alert('上传失败,请检查后端服务');
} finally {
setIsLoading(false);
setPendingFile(null);
if (videoInputRef.current) videoInputRef.current.value = '';
}
};
const handleDicomUpload = async (files: FileList | null) => {
if (!files || files.length === 0) return;
const dcmFiles = Array.from(files).filter((f) => f.name.toLowerCase().endsWith('.dcm'));
if (dcmFiles.length === 0) {
alert('未选择有效的 .dcm 文件');
return;
}
setIsLoading(true);
try {
const result = await uploadDicomBatch(dcmFiles);
await parseMedia(String(result.project_id));
alert(`DICOM 上传成功: ${result.uploaded_count} 个文件`);
const data = await getProjects();
setProjects(data);
} catch (err) {
console.error('DICOM upload failed:', err);
alert('DICOM 上传失败,请检查后端服务');
} finally {
setIsLoading(false);
if (dicomInputRef.current) dicomInputRef.current.value = '';
}
};
const SkeletonCard = () => (
<div className="group flex flex-col bg-[#111] border border-white/5 rounded-xl overflow-hidden animate-pulse">
<div className="w-full aspect-[16/9] bg-[#1a1a1a]" />
@@ -75,45 +134,51 @@ export function ProjectLibrary({ onProjectSelect }: ProjectLibraryProps) {
<Plus size={18} />
<span></span>
</button>
<button
onClick={() => fileInputRef.current?.click()}
className="flex items-center gap-2 bg-cyan-600 hover:bg-cyan-500 text-white px-5 py-2.5 rounded-lg font-medium text-sm transition-colors border border-cyan-500 shadow-lg shadow-cyan-900/20"
>
<UploadCloud size={18} />
<span></span>
</button>
<div className="relative">
<button
onClick={() => setShowImportMenu(!showImportMenu)}
className="flex items-center gap-2 bg-cyan-600 hover:bg-cyan-500 text-white px-5 py-2.5 rounded-lg font-medium text-sm transition-colors border border-cyan-500 shadow-lg shadow-cyan-900/20"
>
<UploadCloud size={18} />
<span></span>
</button>
{showImportMenu && (
<div className="absolute right-0 top-full mt-2 w-56 bg-[#111] border border-white/10 rounded-lg shadow-2xl z-50 overflow-hidden">
<button
className="w-full text-left px-4 py-3 text-sm text-gray-200 hover:bg-white/5 flex items-center gap-3 transition-colors"
onClick={() => { setShowImportMenu(false); videoInputRef.current?.click(); }}
>
<Film size={16} className="text-cyan-400" />
</button>
<button
className="w-full text-left px-4 py-3 text-sm text-gray-200 hover:bg-white/5 flex items-center gap-3 transition-colors border-t border-white/5"
onClick={() => { setShowImportMenu(false); dicomInputRef.current?.click(); }}
>
<Activity size={16} className="text-emerald-400" />
DICOM
</button>
</div>
)}
</div>
<input
type="file"
ref={fileInputRef}
ref={videoInputRef}
className="hidden"
accept="video/*,image/*,.dcm"
onChange={async (e) => {
accept="video/*"
onChange={(e) => {
const file = e.target.files?.[0];
if (!file) return;
try {
setIsLoading(true);
// 1. 创建项目
const newProject = await createProject({
name: file.name,
description: `导入于 ${new Date().toLocaleString()}`,
});
// 2. 带 project_id 上传
const result = await uploadMedia(file, String(newProject.id));
// 3. 触发帧解析
await parseMedia(String(newProject.id));
alert(`上传成功: ${file.name}\n已保存至: ${result.url}`);
// 4. 刷新项目列表
const data = await getProjects();
setProjects(data);
} catch (err) {
console.error('Upload failed:', err);
alert('上传失败,请检查后端服务');
} finally {
setIsLoading(false);
if (fileInputRef.current) fileInputRef.current.value = '';
}
if (file) handleVideoSelect(file);
}}
/>
<input
type="file"
ref={dicomInputRef}
className="hidden"
accept=".dcm"
multiple
onChange={(e) => handleDicomUpload(e.target.files)}
/>
</div>
</div>
@@ -126,29 +191,38 @@ export function ProjectLibrary({ onProjectSelect }: ProjectLibraryProps) {
) : (
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-6">
{projects.map((proj) => (
<div
key={proj.id}
<div
key={proj.id}
className="group flex flex-col bg-[#111] border border-white/5 rounded-xl overflow-hidden cursor-pointer hover:border-cyan-500/50 transition-all hover:shadow-[0_0_20px_rgba(6,182,212,0.15)]"
onClick={() => handleSelect(proj)}
>
<div className={cn("w-full aspect-[16/9] relative flex items-center justify-center overflow-hidden", proj.thumbnail || 'bg-[#0d0d0d]')}>
<Film className="w-12 h-12 text-[#2a2a2a] group-hover:text-[#333] transition-colors" />
<div className="absolute top-2 right-2 flex gap-2">
<span className="backdrop-blur-md bg-black/40 text-gray-200 text-[10px] font-mono px-2 py-1 rounded border border-white/10 uppercase tracking-widest">
{proj.fps || '30FPS'}
</span>
<span className="backdrop-blur-md bg-black/40 text-gray-200 text-[10px] px-2 py-1 rounded border border-white/10 flex items-center gap-1 uppercase tracking-widest">
{proj.status === 'Ready' || proj.status === 'ready' ? (
<><div className="w-1.5 h-1.5 bg-emerald-500 rounded-full" /> </>
) : proj.status === 'Parsing' || proj.status === 'parsing' ? (
<><div className="w-1.5 h-1.5 bg-amber-500 rounded-full animate-pulse" /> </>
) : proj.status === 'pending' || proj.status === 'Pending' ? (
<><div className="w-1.5 h-1.5 bg-blue-500 rounded-full" /> </>
) : (
<><div className="w-1.5 h-1.5 bg-red-500 rounded-full" /> </>
)}
</span>
</div>
<div className={cn("w-full aspect-[16/9] relative flex items-center justify-center overflow-hidden bg-[#0d0d0d]")}>
{proj.thumbnail_url ? (
<img
src={proj.thumbnail_url}
alt={proj.name}
className="absolute inset-0 w-full h-full object-cover group-hover:scale-105 transition-transform duration-500"
loading="lazy"
/>
) : (
<Film className="w-12 h-12 text-[#2a2a2a] group-hover:text-[#333] transition-colors" />
)}
<div className="absolute top-2 right-2 flex gap-2">
<span className="backdrop-blur-md bg-black/40 text-gray-200 text-[10px] font-mono px-2 py-1 rounded border border-white/10 uppercase tracking-widest">
{proj.source_type === 'dicom' ? 'DICOM' : (proj.fps || '30FPS')}
</span>
<span className="backdrop-blur-md bg-black/40 text-gray-200 text-[10px] px-2 py-1 rounded border border-white/10 flex items-center gap-1 uppercase tracking-widest">
{proj.status === 'Ready' || proj.status === 'ready' ? (
<><div className="w-1.5 h-1.5 bg-emerald-500 rounded-full" /> </>
) : proj.status === 'Parsing' || proj.status === 'parsing' ? (
<><div className="w-1.5 h-1.5 bg-amber-500 rounded-full animate-pulse" /> </>
) : proj.status === 'pending' || proj.status === 'Pending' ? (
<><div className="w-1.5 h-1.5 bg-blue-500 rounded-full" /> </>
) : (
<><div className="w-1.5 h-1.5 bg-red-500 rounded-full" /> </>
)}
</span>
</div>
</div>
<div className="p-4 flex flex-col gap-1">
<div className="flex justify-between items-start">
@@ -157,6 +231,9 @@ export function ProjectLibrary({ onProjectSelect }: ProjectLibraryProps) {
</div>
<div className="flex items-center gap-4 text-xs text-gray-500 font-mono mt-2">
<span className="flex items-center gap-1.5"><Settings2 size={12} /> {proj.frames ?? 0} </span>
{proj.original_fps && (
<span className="flex items-center gap-1.5 text-cyan-400/80"><Activity size={12} /> {proj.original_fps.toFixed(1)}fps</span>
)}
</div>
</div>
</div>
@@ -164,6 +241,48 @@ export function ProjectLibrary({ onProjectSelect }: ProjectLibraryProps) {
</div>
)}
{/* Video parse FPS config modal */}
{showVideoConfig && pendingFile && (
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm">
<div className="bg-[#111] border border-white/10 rounded-2xl p-6 w-full max-w-md shadow-2xl">
<h2 className="text-lg font-semibold text-white mb-4"></h2>
<div className="space-y-4">
<div className="text-sm text-gray-400">: <span className="text-gray-200">{pendingFile.name}</span></div>
<div>
<label className="block text-xs font-medium text-gray-400 uppercase tracking-widest mb-2"> (FPS)</label>
<div className="flex items-center gap-3">
<input
type="range"
min="1"
max="60"
value={parseFps}
onChange={(e) => setParseFps(parseInt(e.target.value))}
className="flex-1 accent-cyan-500"
/>
<span className="text-sm font-mono text-cyan-400 w-12 text-right">{parseFps}</span>
</div>
<p className="text-[10px] text-gray-600 mt-1"></p>
</div>
</div>
<div className="flex justify-end gap-3 mt-6">
<button
onClick={() => { setShowVideoConfig(false); setPendingFile(null); }}
className="px-4 py-2 rounded-lg text-sm text-gray-400 hover:text-white transition-colors"
>
</button>
<button
onClick={handleVideoUpload}
className="px-4 py-2 rounded-lg text-sm font-medium bg-cyan-500 hover:bg-cyan-400 text-black transition-all"
>
</button>
</div>
</div>
</div>
)}
{/* New project modal */}
{showModal && (
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm">
<div className="bg-[#111] border border-white/10 rounded-2xl p-6 w-full max-w-md shadow-2xl">

View File

@@ -48,10 +48,14 @@ export async function getProjects(): Promise<Project[]> {
description: p.description,
status: p.status,
frames: p.frame_count ?? 0,
fps: '30FPS',
fps: p.original_fps ? `${Math.round(p.original_fps)}FPS` : '30FPS',
thumbnail_url: p.thumbnail_url,
video_path: p.video_path,
source_type: p.source_type,
original_fps: p.original_fps,
parse_fps: p.parse_fps,
createdAt: p.created_at,
updatedAt: p.updated_at,
video_path: p.video_path,
}));
}
@@ -67,10 +71,14 @@ export async function createProject(payload: {
description: p.description,
status: p.status,
frames: p.frame_count ?? 0,
fps: '30FPS',
fps: p.original_fps ? `${Math.round(p.original_fps)}FPS` : '30FPS',
thumbnail_url: p.thumbnail_url,
video_path: p.video_path,
source_type: p.source_type,
original_fps: p.original_fps,
parse_fps: p.parse_fps,
createdAt: p.created_at,
updatedAt: p.updated_at,
video_path: p.video_path,
};
}
@@ -135,6 +143,16 @@ export async function getProjectFrames(projectId: string): Promise<Array<{
return response.data;
}
export async function uploadDicomBatch(files: File[], projectId?: string): Promise<{ project_id: number; uploaded_count: number; message: string }> {
const formData = new FormData();
files.forEach((file) => formData.append('files', file));
if (projectId) formData.append('project_id', projectId);
const response = await apiClient.post('/api/media/upload/dicom', formData, {
headers: { 'Content-Type': 'multipart/form-data' },
});
return response.data;
}
export async function parseMedia(projectId: string): Promise<{
project_id: number;
frames_extracted: number;

View File

@@ -8,7 +8,11 @@ export interface Project {
fps?: string;
frames?: number;
thumbnail?: string;
thumbnail_url?: string;
video_path?: string;
source_type?: string;
original_fps?: number;
parse_fps?: number;
createdAt?: string;
updatedAt?: string;
}

View File

@@ -0,0 +1,62 @@
# 实现方案 — 2026-04-30
## R1 — PyTorch CUDA + SAM2 安装
### 步骤
1. `pip uninstall torch torchvision torchaudio -y`
2. `pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124`
3. `pip install sam2`
4. 验证: `torch.cuda.is_available()` → True
5. 修改 `backend/services/sam2_engine.py`,移除 stub fallback使用真实 SAM2
### SAM2 模型配置
- 模型文件: `/home/wkmgc/Desktop/Seg_Server/models/sam2_hiera_tiny.pt` (149MB)
- 配置文件: 需下载对应 YAML 或使用默认配置
## R2 — 视频封面
### 后端
1. `frame_parser.py`: `parse_video()` 提取第一帧为 `thumbnail.jpg`
2. `upload_frames_to_minio()`: 同时上传 thumbnail 到 `projects/{id}/thumbnail.jpg`
3. `models.py`: Project 增加 `thumbnail_url` 字段
4. `schemas.py`: ProjectOut 增加 `thumbnail_url`
5. `projects.py`: list_projects / get_project 返回 thumbnail presigned URL
### 前端
1. `ProjectLibrary.tsx`: 卡片背景显示 `proj.thumbnail_url``<img>`
## R3 — 帧率
### 后端
1. `models.py`: Project 增加 `original_fps` (float), `parse_fps` (float, default=30)
2. `schemas.py`: ProjectOut / ProjectCreate 增加字段
3. `frame_parser.py`:
- `parse_video()` 开头用 FFmpeg/FFprobe 读取原始帧率,返回给调用方
- 解析时按 `parse_fps` 参数提取帧
4. `media.py`: `parse_media()` 传入 parse_fps
5. `main.py`: 默认视频种子逻辑设置 original_fps
### 前端
1. `useStore.ts`: Project 增加 `original_fps`, `parse_fps`
2. `api.ts`: getProjects / createProject 映射字段
3. `ProjectLibrary.tsx`: 显示真实帧率 `original_fps`
4. 上传时弹窗允许设置 parse_fps
## R4 — DICOM 批量导入
### 后端
1. `models.py`: Project 增加 `source_type` (str, default="video")
2. `schemas.py`: ProjectOut 增加 source_type
3. `media.py`:
- `upload_media()` 支持批量上传(`List[UploadFile]`
- 新增 `/api/media/upload/dicom` 接口,专用于批量 .dcm 上传
- DICOM 文件存储到 `uploads/{project_id}/dicom/`
- 上传完成后可直接触发解析
4. `frame_parser.py`: `parse_dicom()` 支持从 MinIO 读取整个 dicom 目录
### 前端
1. `ProjectLibrary.tsx`:
- 导入按钮支持两种模式: 视频导入 / DICOM 序列导入
- DICOM 模式: `<input multiple accept=".dcm">`
- 上传进度显示
2. `useStore.ts`: Project 增加 `source_type`

View File

@@ -0,0 +1,22 @@
# 测试方案 — 2026-04-30
## TC1 — PyTorch CUDA + SAM2
1. 后端启动后日志应显示 "SAM2 loaded successfully" 而非 "SAM2 import failed"
2. `python -c "import torch; print(torch.cuda.is_available())"` → True
3. `python -c "import sam2; print('OK')"` → OK
4. 调用 `/api/ai/predict` 应返回真实 polygon而非 dummy rectangle
## TC2 — 视频封面
1. 解析视频后,检查 MinIO 中是否存在 `projects/{id}/thumbnail.jpg`
2. 项目库卡片应显示视频第一帧作为封面背景
## TC3 — 帧率
1. 上传 25fps 视频original_fps 应显示 25.0
2. 设置 parse_fps=10解析后帧数应为 original_fps 的约 1/3
3. 项目库显示原始帧率
## TC4 — DICOM 批量导入
1. 选择 10 个 .dcm 文件批量上传
2. 项目创建成功source_type="dicom"
3. 解析完成后帧数等于上传的 .dcm 数量
4. 再次向同一项目上传 5 个 .dcm帧数增加 5

View File

@@ -5,6 +5,50 @@
---
## 2026-04-30-00-17-44 — PyTorch CUDA + SAM2 + 封面 + 帧率 + DICOM 批量导入
### A. 具体问题
1. PyTorch 为 CPU 版本SAM2 未安装GPU 推理不可用
2. 项目库视频卡片无封面缩略图
3. 项目库 FPS 为硬编码 "30FPS",不显示真实原始帧率,也无法修改解析帧率
4. 不支持 DICOM 连续帧批量导入
### B. 产生原因
1. 系统磁盘仅 24GBPyTorch CUDA wheel (~1GB) + SAM2 编译依赖导致 `No space left on device`
2. 解析视频时未提取封面Project 模型无 thumbnail_url 字段
3. 解析视频时未读取原始帧率Project 模型无 original_fps / parse_fps 字段
4. upload 接口仅支持单文件,无批量 DICOM 上传接口Project 无 source_type 区分视频/DICOM
### C. 解决方案
1. **磁盘扩容后安装 PyTorch CUDA + SAM2**
- `pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124`
- `pip install sam2`(成功编译安装 sam2-1.1.0
- 修正 `config.py``sam_model_config``configs/sam2/sam2_hiera_t.yaml`Hydra 包内相对路径)
- 验证 `torch.cuda.is_available()` → TrueRTX 4090 识别正常
2. **视频封面**
- `frame_parser.py` 新增 `extract_thumbnail()`,从视频第一帧提取 640px JPEG 封面
- `media.py` `parse_media()` 解析视频时自动上传封面到 `projects/{id}/thumbnail.jpg`
- `projects.py` 返回 presigned URL
- 前端 `ProjectLibrary.tsx` 卡片背景显示 `<img>` 封面
3. **真实帧率 + 可修改解析帧率**
- `models.py` Project 新增 `original_fps` (Float), `parse_fps` (Float, default=30)
- `frame_parser.py` `get_video_fps()` 用 OpenCV 读取真实帧率,`parse_video()` 返回 `(frames, original_fps)`
- 前端 `ProjectLibrary.tsx` 上传视频时弹窗允许用户滑动设置 parse_fps (1-60)
- 项目卡片显示 `原 60.0fps``30FPS` 标签
4. **DICOM 批量导入**
- `models.py` Project 新增 `source_type` (video | dicom)
- `media.py` 新增 `/api/media/upload/dicom` 接口,接收 `List[UploadFile]`,上传多个 .dcm 到 `uploads/{id}/dicom/`
- `media.py` `parse_media()` 支持 DICOM 模式:从 MinIO 下载整个 dicom 目录 → `parse_dicom()` 解析
- 前端 `ProjectLibrary.tsx` 导入按钮展开菜单:导入视频 / 导入 DICOM 序列DICOM 用 `<input multiple accept=".dcm">`
### D. 后续如何避免问题
1. **SAM2 配置路径必须用 Hydra 包内相对路径**`build_sam2()` 使用 Hydra配置文件必须传 `configs/sam2/xxx.yaml` 而非绝对路径
2. **数据库 schema 变更必须彻底清理旧表**PostgreSQL `drop_all()` 可能因外键约束不彻底,生产环境应使用 Alembic 迁移,开发环境应手动 `DROP TABLE ... CASCADE`
3. **大依赖安装必须预留足够磁盘空间**PyTorch CUDA (~1GB) + SAM2 build (~500MB temp) 至少需要 5GB 可用空间
4. **前端上传交互必须区分媒体类型**:视频和 DICOM 的上传流程、文件选择器 `accept`、后续解析逻辑完全不同,应提供明确的模式切换
---
## 2026-04-29-23-28-13 — 视频帧显示链路全修复
### A. 具体问题

View File

@@ -0,0 +1,39 @@
# 需求分析 — 2026-04-30
## 需求背景
磁盘已扩容至 184GB空间充足。用户提出 4 个核心需求:
## 需求拆解
| 编号 | 需求 | 优先级 | 影响面 |
|------|------|--------|--------|
| R1 | 安装完整版 PyTorch CUDA + SAM2恢复 GPU 推理 | P0 | conda 环境 |
| R2 | 项目库视频显示封面缩略图 | P0 | backend/media.py, ProjectLibrary.tsx |
| R3 | 项目库显示原始帧率,支持修改解析帧率 | P0 | backend/models.py, frame_parser.py, 前端 |
| R4 | DICOM 连续帧批量导入支持 | P0 | backend/media.py, ProjectLibrary.tsx |
### R1 — PyTorch CUDA + SAM2
- 当前 PyTorch 为 CPU 版本 (2.11.0+cpu)
- SAM2 未安装
- GPU: RTX 4090 24GB驱动 595.58.03
- 目标: 安装 PyTorch 2.5+ CUDA 12.4 + SAM2使 `/api/ai/predict` 使用真实 SAM2 推理
### R2 — 视频封面
- 当前项目卡片只显示 Film 图标,无封面
- 目标: 解析视频时提取第一帧作为封面 thumbnail项目库显示为卡片背景图
### R3 — 帧率显示与修改
- 当前 fps 为硬编码 "30FPS"
- 目标:
- 解析视频时读取真实原始帧率,存入 Project.original_fps
- 项目库显示原始帧率
- 支持设置 parse_fps解析帧率可低于原始帧率
- 后端解析时按 parse_fps 提取帧
### R4 — DICOM 批量导入
- Data_Dicom帧/ 下有 300 个 .dcm 文件,共约 160MB
- 目标:
- 项目类型支持 `source_type`: video / dicom
- 导入时支持多选 .dcm 文件批量上传
- 上传后解析为帧序列
- 支持后续继续向同一项目新增 .dcm 文件