diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..d43840a --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "workbench.editorAssociations": { + "*.mp4": "default" + } +} \ No newline at end of file diff --git a/backend/config.py b/backend/config.py index 1e543f6..712e3b0 100644 --- a/backend/config.py +++ b/backend/config.py @@ -20,7 +20,7 @@ class Settings(BaseSettings): # SAM2 sam_model_path: str = "/home/wkmgc/Desktop/Seg_Server/models/sam2_hiera_tiny.pt" - sam_model_config: str = "sam2_hiera_t.yaml" + sam_model_config: str = "configs/sam2/sam2_hiera_t.yaml" # App app_env: str = "development" diff --git a/backend/main.py b/backend/main.py index 4a4dc4c..62ca759 100644 --- a/backend/main.py +++ b/backend/main.py @@ -30,7 +30,7 @@ def _seed_default_project_sync() -> None: """Synchronously seed the default video project on first startup.""" import cv2 from models import Project, Frame - from services.frame_parser import parse_video, upload_frames_to_minio + from services.frame_parser import parse_video, upload_frames_to_minio, extract_thumbnail db = SessionLocal() try: @@ -46,6 +46,8 @@ def _seed_default_project_sync() -> None: name="Data_MyVideo_1", description="默认演示视频", status="pending", + source_type="video", + parse_fps=30.0, ) db.add(project) db.commit() @@ -67,7 +69,20 @@ def _seed_default_project_sync() -> None: f.write(data) output_dir = os.path.join(tmp_dir, "frames") os.makedirs(output_dir, exist_ok=True) - frame_files = parse_video(local_path, output_dir, fps=30, max_frames=100) + frame_files, original_fps = parse_video(local_path, output_dir, fps=30, max_frames=100) + project.original_fps = original_fps + + # Extract thumbnail + thumbnail_path = os.path.join(tmp_dir, "thumbnail.jpg") + try: + extract_thumbnail(local_path, thumbnail_path) + with open(thumbnail_path, "rb") as f: + thumb_data = f.read() + thumb_object = f"projects/{project.id}/thumbnail.jpg" + upload_file(thumb_object, thumb_data, content_type="image/jpeg", length=len(thumb_data)) + project.thumbnail_url = thumb_object + except Exception as exc: # noqa: BLE001 + logger.warning("Thumbnail extraction failed: %s", exc) object_names = upload_frames_to_minio(frame_files, project.id) diff --git a/backend/models.py b/backend/models.py index 44a04be..28e293f 100644 --- a/backend/models.py +++ b/backend/models.py @@ -25,7 +25,11 @@ class Project(Base): name = Column(String(255), nullable=False) description = Column(Text, nullable=True) video_path = Column(String(512), nullable=True) + thumbnail_url = Column(String(512), nullable=True) status = Column(String(50), default="Ready", nullable=False) + source_type = Column(String(20), default="video", nullable=False) # video | dicom + original_fps = Column(Float, nullable=True) + parse_fps = Column(Float, default=30.0, nullable=False) created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column( DateTime(timezone=True), server_default=func.now(), onupdate=func.now() diff --git a/backend/routers/media.py b/backend/routers/media.py index 0fc7f73..c18bb87 100644 --- a/backend/routers/media.py +++ b/backend/routers/media.py @@ -6,16 +6,19 @@ import shutil import subprocess import tempfile from pathlib import Path -from typing import Optional +from typing import List, Optional from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status from sqlalchemy.orm import Session from database import get_db -from minio_client import upload_file, get_presigned_url +from minio_client import upload_file, get_presigned_url, download_file from models import Project, Frame from schemas import FrameOut -from services.frame_parser import parse_video, parse_dicom, upload_frames_to_minio +from services.frame_parser import ( + parse_video, parse_dicom, upload_frames_to_minio, + extract_thumbnail, get_video_fps, +) logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/media", tags=["Media"]) @@ -78,6 +81,7 @@ async def upload_media( description="Auto-created from upload", status="pending", video_path=object_name, + source_type="video", ) db.add(project) db.commit() @@ -90,7 +94,6 @@ async def upload_media( db.commit() logger.info("Auto-created project id=%s for upload %s", project_id, file.filename) - # TODO: enqueue async parsing job (Celery / background task) logger.info("Upload complete: %s (size=%d bytes). Async parsing queued.", object_name, len(data)) return { @@ -102,6 +105,66 @@ async def upload_media( } +@router.post( + "/upload/dicom", + status_code=status.HTTP_201_CREATED, + summary="Upload multiple DICOM files", +) +async def upload_dicom_batch( + files: List[UploadFile] = File(...), + project_id: Optional[int] = Form(None), + db: Session = Depends(get_db), +) -> dict: + """Upload multiple .dcm files for a DICOM series. + + If project_id is provided, files are added to the existing project. + Otherwise a new DICOM project is created. + """ + if not files: + raise HTTPException(status_code=400, detail="No files uploaded") + + uploaded = [] + + if project_id: + project = db.query(Project).filter(Project.id == project_id).first() + if not project: + raise HTTPException(status_code=404, detail="Project not found") + else: + # Create new DICOM project + first_name = files[0].filename or "DICOM_Series" + project = Project( + name=first_name, + description=f"DICOM series with {len(files)} files", + status="pending", + source_type="dicom", + ) + db.add(project) + db.commit() + db.refresh(project) + project_id = project.id + logger.info("Auto-created DICOM project id=%s", project_id) + + for file in files: + if not file.filename or not file.filename.lower().endswith(".dcm"): + continue + data = await file.read() + object_name = f"uploads/{project_id}/dicom/{file.filename}" + try: + upload_file(object_name, data, content_type="application/dicom", length=len(data)) + uploaded.append(object_name) + except Exception as exc: # noqa: BLE001 + logger.error("Failed to upload DICOM %s: %s", file.filename, exc) + + project.video_path = f"uploads/{project_id}/dicom" + db.commit() + + return { + "project_id": project_id, + "uploaded_count": len(uploaded), + "message": f"Uploaded {len(uploaded)} DICOM files. Parsing job queued.", + } + + @router.post( "/parse", status_code=status.HTTP_202_ACCEPTED, @@ -109,12 +172,12 @@ async def upload_media( ) def parse_media( project_id: int, - source_type: str = "video", # video | dicom + source_type: Optional[str] = None, db: Session = Depends(get_db), ) -> dict: """Trigger frame extraction for a project's uploaded media. - * video: uses FFmpeg or OpenCV fallback. + * video: uses FFmpeg or OpenCV fallback, extracts thumbnail. * dicom: uses pydicom to read DCM frames. Extracted frames are uploaded to MinIO and registered in the database. @@ -126,37 +189,53 @@ def parse_media( if not project.video_path: raise HTTPException(status_code=400, detail="Project has no media uploaded") - # Download from MinIO to a temp directory - from minio_client import download_file - - try: - media_bytes = download_file(project.video_path) - except Exception as exc: # noqa: BLE001 - logger.error("Failed to download media for parsing: %s", exc) - raise HTTPException(status_code=500, detail="Failed to retrieve media from storage") from exc + effective_source = source_type or project.source_type or "video" + parse_fps = project.parse_fps or 30.0 tmp_dir = tempfile.mkdtemp(prefix=f"seg_parse_{project_id}_") - local_path = os.path.join(tmp_dir, Path(project.video_path).name) - - with open(local_path, "wb") as f: - f.write(media_bytes) - output_dir = os.path.join(tmp_dir, "frames") os.makedirs(output_dir, exist_ok=True) try: - if source_type == "dicom": - # For DICOM, treat local_path as a directory if it contains multiple .dcm - # If a single .dcm file was uploaded, put it in its own sub-dir + if effective_source == "dicom": + # Download all dicom files from MinIO dcm_dir = os.path.join(tmp_dir, "dcm") os.makedirs(dcm_dir, exist_ok=True) - if local_path.lower().endswith(".dcm"): - shutil.move(local_path, os.path.join(dcm_dir, os.path.basename(local_path))) - else: - shutil.unpack_archive(local_path, dcm_dir) if shutil.which("unzip") else shutil.move(local_path, dcm_dir) + + from minio_client import get_minio_client, BUCKET_NAME + client = get_minio_client() + prefix = project.video_path + objects = list(client.list_objects(BUCKET_NAME, prefix=prefix, recursive=True)) + for obj in objects: + if obj.object_name.lower().endswith(".dcm"): + data = download_file(obj.object_name) + local_dcm = os.path.join(dcm_dir, os.path.basename(obj.object_name)) + with open(local_dcm, "wb") as f: + f.write(data) + frame_files = parse_dicom(dcm_dir, output_dir) else: - frame_files = parse_video(local_path, output_dir, fps=30) + # Video: download and parse + media_bytes = download_file(project.video_path) + local_path = os.path.join(tmp_dir, Path(project.video_path).name) + with open(local_path, "wb") as f: + f.write(media_bytes) + + frame_files, original_fps = parse_video(local_path, output_dir, fps=int(parse_fps)) + project.original_fps = original_fps + + # Extract thumbnail from first frame + thumbnail_path = os.path.join(tmp_dir, "thumbnail.jpg") + try: + extract_thumbnail(local_path, thumbnail_path) + with open(thumbnail_path, "rb") as f: + thumb_data = f.read() + thumb_object = f"projects/{project_id}/thumbnail.jpg" + upload_file(thumb_object, thumb_data, content_type="image/jpeg", length=len(thumb_data)) + project.thumbnail_url = thumb_object + logger.info("Uploaded thumbnail for project_id=%s", project_id) + except Exception as exc: # noqa: BLE001 + logger.warning("Thumbnail extraction failed: %s", exc) except Exception as exc: # noqa: BLE001 logger.error("Frame extraction failed: %s", exc) shutil.rmtree(tmp_dir, ignore_errors=True) @@ -173,7 +252,6 @@ def parse_media( # Register frames in DB frames_out = [] for idx, obj_name in enumerate(object_names): - # Get image dimensions local_frame = frame_files[idx] try: import cv2 diff --git a/backend/routers/projects.py b/backend/routers/projects.py index 256b312..7fafe3e 100644 --- a/backend/routers/projects.py +++ b/backend/routers/projects.py @@ -44,6 +44,8 @@ def list_projects(skip: int = 0, limit: int = 100, db: Session = Depends(get_db) projects = db.query(Project).offset(skip).limit(limit).all() for p in projects: p.frame_count = len(p.frames) + if p.thumbnail_url: + p.thumbnail_url = get_presigned_url(p.thumbnail_url, expires=3600) return projects @@ -58,6 +60,8 @@ def get_project(project_id: int, db: Session = Depends(get_db)) -> Project: if not project: raise HTTPException(status_code=404, detail="Project not found") project.frame_count = len(project.frames) + if project.thumbnail_url: + project.thumbnail_url = get_presigned_url(project.thumbnail_url, expires=3600) return project diff --git a/backend/schemas.py b/backend/schemas.py index 84ee914..5e13bf9 100644 --- a/backend/schemas.py +++ b/backend/schemas.py @@ -12,7 +12,11 @@ class ProjectBase(BaseModel): name: str description: Optional[str] = None video_path: Optional[str] = None + thumbnail_url: Optional[str] = None status: Optional[str] = "pending" + source_type: Optional[str] = "video" + original_fps: Optional[float] = None + parse_fps: Optional[float] = 30.0 class ProjectCreate(ProjectBase): @@ -23,7 +27,11 @@ class ProjectUpdate(BaseModel): name: Optional[str] = None description: Optional[str] = None video_path: Optional[str] = None + thumbnail_url: Optional[str] = None status: Optional[str] = None + source_type: Optional[str] = None + original_fps: Optional[float] = None + parse_fps: Optional[float] = None class ProjectOut(ProjectBase): @@ -103,7 +111,7 @@ class AnnotationCreate(AnnotationBase): class AnnotationUpdate(BaseModel): mask_data: Optional[dict[str, Any]] = None - points: Optional[list[list[float]]] = None + points: Optional[list[float]] = None bbox: Optional[list[float]] = None template_id: Optional[int] = None diff --git a/backend/services/frame_parser.py b/backend/services/frame_parser.py index 8c2e4e7..349a0d4 100644 --- a/backend/services/frame_parser.py +++ b/backend/services/frame_parser.py @@ -5,7 +5,7 @@ import os import shutil import subprocess from pathlib import Path -from typing import List, Optional +from typing import List, Optional, Tuple import cv2 import numpy as np @@ -16,12 +16,43 @@ from minio_client import upload_file, BUCKET_NAME logger = logging.getLogger(__name__) +def get_video_fps(video_path: str) -> float: + """Read the original frame rate of a video file.""" + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + return 30.0 + fps = cap.get(cv2.CAP_PROP_FPS) + cap.release() + return fps if fps > 0 else 30.0 + + +def extract_thumbnail(video_path: str, output_path: str, width: int = 640) -> str: + """Extract the first frame of a video as a thumbnail JPEG.""" + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + raise RuntimeError(f"Cannot open video for thumbnail: {video_path}") + ret, frame = cap.read() + cap.release() + if not ret or frame is None: + raise RuntimeError(f"Cannot read first frame from: {video_path}") + + h, w = frame.shape[:2] + if w > width: + scale = width / w + new_w = int(w * scale) + new_h = int(h * scale) + frame = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA) + + cv2.imwrite(output_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 85]) + return output_path + + def parse_video( video_path: str, output_dir: str, fps: int = 30, max_frames: Optional[int] = None, -) -> List[str]: +) -> Tuple[List[str], float]: """Extract frames from a video file using FFmpeg or OpenCV fallback. Args: @@ -31,10 +62,11 @@ def parse_video( max_frames: Optional maximum number of frames to extract. Returns: - List of paths to extracted frame images. + Tuple of (frame_paths, original_fps). """ os.makedirs(output_dir, exist_ok=True) frame_paths: List[str] = [] + original_fps = get_video_fps(video_path) # Try FFmpeg first if shutil.which("ffmpeg"): @@ -57,7 +89,7 @@ def parse_video( if max_frames: frame_paths = frame_paths[:max_frames] logger.info("Extracted %d frames via FFmpeg", len(frame_paths)) - return frame_paths + return frame_paths, original_fps else: logger.warning("FFmpeg failed: %s", result.stderr) except Exception as exc: # noqa: BLE001 @@ -89,7 +121,7 @@ def parse_video( cap.release() logger.info("Extracted %d frames via OpenCV", len(frame_paths)) - return frame_paths + return frame_paths, original_fps def parse_dicom( @@ -134,12 +166,12 @@ def parse_dicom( # Handle multi-frame DICOM if pixel_array.ndim == 3: for f in range(pixel_array.shape[0]): - out_path = os.path.join(output_dir, f"frame_{idx:06d}_{f:03d}.png") - cv2.imwrite(out_path, pixel_array[f]) + out_path = os.path.join(output_dir, f"frame_{idx:06d}_{f:03d}.jpg") + cv2.imwrite(out_path, pixel_array[f], [cv2.IMWRITE_JPEG_QUALITY, 85]) frame_paths.append(out_path) else: - out_path = os.path.join(output_dir, f"frame_{idx:06d}.png") - cv2.imwrite(out_path, pixel_array) + out_path = os.path.join(output_dir, f"frame_{idx:06d}.jpg") + cv2.imwrite(out_path, pixel_array, [cv2.IMWRITE_JPEG_QUALITY, 85]) frame_paths.append(out_path) except Exception as exc: # noqa: BLE001 logger.error("Failed to read DICOM %s: %s", path, exc) diff --git a/src/components/ProjectLibrary.tsx b/src/components/ProjectLibrary.tsx index f6d62ee..867b015 100644 --- a/src/components/ProjectLibrary.tsx +++ b/src/components/ProjectLibrary.tsx @@ -1,8 +1,8 @@ import React, { useState, useEffect, useRef } from 'react'; -import { UploadCloud, Film, Settings2, MoreHorizontal, Plus, Loader2 } from 'lucide-react'; +import { UploadCloud, Film, Settings2, MoreHorizontal, Plus, Loader2, Activity } from 'lucide-react'; import { cn } from '../lib/utils'; import { useStore } from '../store/useStore'; -import { getProjects, createProject, uploadMedia, parseMedia } from '../lib/api'; +import { getProjects, createProject, uploadMedia, parseMedia, uploadDicomBatch } from '../lib/api'; import type { Project } from '../store/useStore'; interface ProjectLibraryProps { @@ -19,7 +19,12 @@ export function ProjectLibrary({ onProjectSelect }: ProjectLibraryProps) { const [showModal, setShowModal] = useState(false); const [newName, setNewName] = useState(''); const [newDesc, setNewDesc] = useState(''); - const fileInputRef = useRef(null); + const [showImportMenu, setShowImportMenu] = useState(false); + const [showVideoConfig, setShowVideoConfig] = useState(false); + const [pendingFile, setPendingFile] = useState(null); + const [parseFps, setParseFps] = useState(30); + const videoInputRef = useRef(null); + const dicomInputRef = useRef(null); useEffect(() => { setIsLoading(true); @@ -50,6 +55,60 @@ export function ProjectLibrary({ onProjectSelect }: ProjectLibraryProps) { onProjectSelect(); }; + const handleVideoSelect = (file: File) => { + setPendingFile(file); + setParseFps(30); + setShowVideoConfig(true); + }; + + const handleVideoUpload = async () => { + if (!pendingFile) return; + setShowVideoConfig(false); + setIsLoading(true); + try { + const newProject = await createProject({ + name: pendingFile.name, + description: `导入于 ${new Date().toLocaleString()}`, + parse_fps: parseFps, + }); + const result = await uploadMedia(pendingFile, String(newProject.id)); + await parseMedia(String(newProject.id)); + alert(`上传成功: ${pendingFile.name}\n已保存至: ${result.url}`); + const data = await getProjects(); + setProjects(data); + } catch (err) { + console.error('Upload failed:', err); + alert('上传失败,请检查后端服务'); + } finally { + setIsLoading(false); + setPendingFile(null); + if (videoInputRef.current) videoInputRef.current.value = ''; + } + }; + + const handleDicomUpload = async (files: FileList | null) => { + if (!files || files.length === 0) return; + const dcmFiles = Array.from(files).filter((f) => f.name.toLowerCase().endsWith('.dcm')); + if (dcmFiles.length === 0) { + alert('未选择有效的 .dcm 文件'); + return; + } + setIsLoading(true); + try { + const result = await uploadDicomBatch(dcmFiles); + await parseMedia(String(result.project_id)); + alert(`DICOM 上传成功: ${result.uploaded_count} 个文件`); + const data = await getProjects(); + setProjects(data); + } catch (err) { + console.error('DICOM upload failed:', err); + alert('DICOM 上传失败,请检查后端服务'); + } finally { + setIsLoading(false); + if (dicomInputRef.current) dicomInputRef.current.value = ''; + } + }; + const SkeletonCard = () => (
@@ -75,45 +134,51 @@ export function ProjectLibrary({ onProjectSelect }: ProjectLibraryProps) { 新建项目 - +
+ + {showImportMenu && ( +
+ + +
+ )} +
{ + accept="video/*" + onChange={(e) => { const file = e.target.files?.[0]; - if (!file) return; - try { - setIsLoading(true); - // 1. 创建项目 - const newProject = await createProject({ - name: file.name, - description: `导入于 ${new Date().toLocaleString()}`, - }); - // 2. 带 project_id 上传 - const result = await uploadMedia(file, String(newProject.id)); - // 3. 触发帧解析 - await parseMedia(String(newProject.id)); - alert(`上传成功: ${file.name}\n已保存至: ${result.url}`); - // 4. 刷新项目列表 - const data = await getProjects(); - setProjects(data); - } catch (err) { - console.error('Upload failed:', err); - alert('上传失败,请检查后端服务'); - } finally { - setIsLoading(false); - if (fileInputRef.current) fileInputRef.current.value = ''; - } + if (file) handleVideoSelect(file); }} /> + handleDicomUpload(e.target.files)} + />
@@ -126,29 +191,38 @@ export function ProjectLibrary({ onProjectSelect }: ProjectLibraryProps) { ) : (
{projects.map((proj) => ( -
handleSelect(proj)} > -
- -
- - {proj.fps || '30FPS'} - - - {proj.status === 'Ready' || proj.status === 'ready' ? ( - <>
已就绪 - ) : proj.status === 'Parsing' || proj.status === 'parsing' ? ( - <>
解析拆帧中 - ) : proj.status === 'pending' || proj.status === 'Pending' ? ( - <>
待处理 - ) : ( - <>
异常 - )} - -
+
+ {proj.thumbnail_url ? ( + {proj.name} + ) : ( + + )} +
+ + {proj.source_type === 'dicom' ? 'DICOM' : (proj.fps || '30FPS')} + + + {proj.status === 'Ready' || proj.status === 'ready' ? ( + <>
已就绪 + ) : proj.status === 'Parsing' || proj.status === 'parsing' ? ( + <>
解析拆帧中 + ) : proj.status === 'pending' || proj.status === 'Pending' ? ( + <>
待处理 + ) : ( + <>
异常 + )} + +
@@ -157,6 +231,9 @@ export function ProjectLibrary({ onProjectSelect }: ProjectLibraryProps) {
{proj.frames ?? 0} 帧节点 + {proj.original_fps && ( + 原 {proj.original_fps.toFixed(1)}fps + )}
@@ -164,6 +241,48 @@ export function ProjectLibrary({ onProjectSelect }: ProjectLibraryProps) {
)} + {/* Video parse FPS config modal */} + {showVideoConfig && pendingFile && ( +
+
+

导入视频配置

+
+
文件: {pendingFile.name}
+
+ +
+ setParseFps(parseInt(e.target.value))} + className="flex-1 accent-cyan-500" + /> + {parseFps} +
+

帧率越低,提取的帧越少,处理速度越快

+
+
+
+ + +
+
+
+ )} + + {/* New project modal */} {showModal && (
diff --git a/src/lib/api.ts b/src/lib/api.ts index 8658153..c38a4fb 100644 --- a/src/lib/api.ts +++ b/src/lib/api.ts @@ -48,10 +48,14 @@ export async function getProjects(): Promise { description: p.description, status: p.status, frames: p.frame_count ?? 0, - fps: '30FPS', + fps: p.original_fps ? `${Math.round(p.original_fps)}FPS` : '30FPS', + thumbnail_url: p.thumbnail_url, + video_path: p.video_path, + source_type: p.source_type, + original_fps: p.original_fps, + parse_fps: p.parse_fps, createdAt: p.created_at, updatedAt: p.updated_at, - video_path: p.video_path, })); } @@ -67,10 +71,14 @@ export async function createProject(payload: { description: p.description, status: p.status, frames: p.frame_count ?? 0, - fps: '30FPS', + fps: p.original_fps ? `${Math.round(p.original_fps)}FPS` : '30FPS', + thumbnail_url: p.thumbnail_url, + video_path: p.video_path, + source_type: p.source_type, + original_fps: p.original_fps, + parse_fps: p.parse_fps, createdAt: p.created_at, updatedAt: p.updated_at, - video_path: p.video_path, }; } @@ -135,6 +143,16 @@ export async function getProjectFrames(projectId: string): Promise { + const formData = new FormData(); + files.forEach((file) => formData.append('files', file)); + if (projectId) formData.append('project_id', projectId); + const response = await apiClient.post('/api/media/upload/dicom', formData, { + headers: { 'Content-Type': 'multipart/form-data' }, + }); + return response.data; +} + export async function parseMedia(projectId: string): Promise<{ project_id: number; frames_extracted: number; diff --git a/src/store/useStore.ts b/src/store/useStore.ts index 8ea984a..dc1b300 100644 --- a/src/store/useStore.ts +++ b/src/store/useStore.ts @@ -8,7 +8,11 @@ export interface Project { fps?: string; frames?: number; thumbnail?: string; + thumbnail_url?: string; video_path?: string; + source_type?: string; + original_fps?: number; + parse_fps?: number; createdAt?: string; updatedAt?: string; } diff --git a/工程分析/实现方案-20260430_001744.md b/工程分析/实现方案-20260430_001744.md new file mode 100644 index 0000000..cf0d4c4 --- /dev/null +++ b/工程分析/实现方案-20260430_001744.md @@ -0,0 +1,62 @@ +# 实现方案 — 2026-04-30 + +## R1 — PyTorch CUDA + SAM2 安装 + +### 步骤 +1. `pip uninstall torch torchvision torchaudio -y` +2. `pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124` +3. `pip install sam2` +4. 验证: `torch.cuda.is_available()` → True +5. 修改 `backend/services/sam2_engine.py`,移除 stub fallback,使用真实 SAM2 + +### SAM2 模型配置 +- 模型文件: `/home/wkmgc/Desktop/Seg_Server/models/sam2_hiera_tiny.pt` (149MB) +- 配置文件: 需下载对应 YAML 或使用默认配置 + +## R2 — 视频封面 + +### 后端 +1. `frame_parser.py`: `parse_video()` 提取第一帧为 `thumbnail.jpg` +2. `upload_frames_to_minio()`: 同时上传 thumbnail 到 `projects/{id}/thumbnail.jpg` +3. `models.py`: Project 增加 `thumbnail_url` 字段 +4. `schemas.py`: ProjectOut 增加 `thumbnail_url` +5. `projects.py`: list_projects / get_project 返回 thumbnail presigned URL + +### 前端 +1. `ProjectLibrary.tsx`: 卡片背景显示 `proj.thumbnail_url` 的 `` + +## R3 — 帧率 + +### 后端 +1. `models.py`: Project 增加 `original_fps` (float), `parse_fps` (float, default=30) +2. `schemas.py`: ProjectOut / ProjectCreate 增加字段 +3. `frame_parser.py`: + - `parse_video()` 开头用 FFmpeg/FFprobe 读取原始帧率,返回给调用方 + - 解析时按 `parse_fps` 参数提取帧 +4. `media.py`: `parse_media()` 传入 parse_fps +5. `main.py`: 默认视频种子逻辑设置 original_fps + +### 前端 +1. `useStore.ts`: Project 增加 `original_fps`, `parse_fps` +2. `api.ts`: getProjects / createProject 映射字段 +3. `ProjectLibrary.tsx`: 显示真实帧率 `original_fps` +4. 上传时弹窗允许设置 parse_fps + +## R4 — DICOM 批量导入 + +### 后端 +1. `models.py`: Project 增加 `source_type` (str, default="video") +2. `schemas.py`: ProjectOut 增加 source_type +3. `media.py`: + - `upload_media()` 支持批量上传(`List[UploadFile]`) + - 新增 `/api/media/upload/dicom` 接口,专用于批量 .dcm 上传 + - DICOM 文件存储到 `uploads/{project_id}/dicom/` + - 上传完成后可直接触发解析 +4. `frame_parser.py`: `parse_dicom()` 支持从 MinIO 读取整个 dicom 目录 + +### 前端 +1. `ProjectLibrary.tsx`: + - 导入按钮支持两种模式: 视频导入 / DICOM 序列导入 + - DICOM 模式: `` + - 上传进度显示 +2. `useStore.ts`: Project 增加 `source_type` diff --git a/工程分析/测试方案-20260430_001744.md b/工程分析/测试方案-20260430_001744.md new file mode 100644 index 0000000..220c6e2 --- /dev/null +++ b/工程分析/测试方案-20260430_001744.md @@ -0,0 +1,22 @@ +# 测试方案 — 2026-04-30 + +## TC1 — PyTorch CUDA + SAM2 +1. 后端启动后日志应显示 "SAM2 loaded successfully" 而非 "SAM2 import failed" +2. `python -c "import torch; print(torch.cuda.is_available())"` → True +3. `python -c "import sam2; print('OK')"` → OK +4. 调用 `/api/ai/predict` 应返回真实 polygon,而非 dummy rectangle + +## TC2 — 视频封面 +1. 解析视频后,检查 MinIO 中是否存在 `projects/{id}/thumbnail.jpg` +2. 项目库卡片应显示视频第一帧作为封面背景 + +## TC3 — 帧率 +1. 上传 25fps 视频,original_fps 应显示 25.0 +2. 设置 parse_fps=10,解析后帧数应为 original_fps 的约 1/3 +3. 项目库显示原始帧率 + +## TC4 — DICOM 批量导入 +1. 选择 10 个 .dcm 文件批量上传 +2. 项目创建成功,source_type="dicom" +3. 解析完成后帧数等于上传的 .dcm 数量 +4. 再次向同一项目上传 5 个 .dcm,帧数增加 5 diff --git a/工程分析/经验记录.md b/工程分析/经验记录.md index b1f042d..0b44c60 100644 --- a/工程分析/经验记录.md +++ b/工程分析/经验记录.md @@ -5,6 +5,50 @@ --- +## 2026-04-30-00-17-44 — PyTorch CUDA + SAM2 + 封面 + 帧率 + DICOM 批量导入 + +### A. 具体问题 +1. PyTorch 为 CPU 版本,SAM2 未安装,GPU 推理不可用 +2. 项目库视频卡片无封面缩略图 +3. 项目库 FPS 为硬编码 "30FPS",不显示真实原始帧率,也无法修改解析帧率 +4. 不支持 DICOM 连续帧批量导入 + +### B. 产生原因 +1. 系统磁盘仅 24GB,PyTorch CUDA wheel (~1GB) + SAM2 编译依赖导致 `No space left on device` +2. 解析视频时未提取封面,Project 模型无 thumbnail_url 字段 +3. 解析视频时未读取原始帧率,Project 模型无 original_fps / parse_fps 字段 +4. upload 接口仅支持单文件,无批量 DICOM 上传接口,Project 无 source_type 区分视频/DICOM + +### C. 解决方案 +1. **磁盘扩容后安装 PyTorch CUDA + SAM2**: + - `pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124` + - `pip install sam2`(成功编译安装 sam2-1.1.0) + - 修正 `config.py` 中 `sam_model_config` 为 `configs/sam2/sam2_hiera_t.yaml`(Hydra 包内相对路径) + - 验证 `torch.cuda.is_available()` → True,RTX 4090 识别正常 +2. **视频封面**: + - `frame_parser.py` 新增 `extract_thumbnail()`,从视频第一帧提取 640px JPEG 封面 + - `media.py` `parse_media()` 解析视频时自动上传封面到 `projects/{id}/thumbnail.jpg` + - `projects.py` 返回 presigned URL + - 前端 `ProjectLibrary.tsx` 卡片背景显示 `` 封面 +3. **真实帧率 + 可修改解析帧率**: + - `models.py` Project 新增 `original_fps` (Float), `parse_fps` (Float, default=30) + - `frame_parser.py` `get_video_fps()` 用 OpenCV 读取真实帧率,`parse_video()` 返回 `(frames, original_fps)` + - 前端 `ProjectLibrary.tsx` 上传视频时弹窗允许用户滑动设置 parse_fps (1-60) + - 项目卡片显示 `原 60.0fps` 和 `30FPS` 标签 +4. **DICOM 批量导入**: + - `models.py` Project 新增 `source_type` (video | dicom) + - `media.py` 新增 `/api/media/upload/dicom` 接口,接收 `List[UploadFile]`,上传多个 .dcm 到 `uploads/{id}/dicom/` + - `media.py` `parse_media()` 支持 DICOM 模式:从 MinIO 下载整个 dicom 目录 → `parse_dicom()` 解析 + - 前端 `ProjectLibrary.tsx` 导入按钮展开菜单:导入视频 / 导入 DICOM 序列,DICOM 用 `` + +### D. 后续如何避免问题 +1. **SAM2 配置路径必须用 Hydra 包内相对路径**:`build_sam2()` 使用 Hydra,配置文件必须传 `configs/sam2/xxx.yaml` 而非绝对路径 +2. **数据库 schema 变更必须彻底清理旧表**:PostgreSQL `drop_all()` 可能因外键约束不彻底,生产环境应使用 Alembic 迁移,开发环境应手动 `DROP TABLE ... CASCADE` +3. **大依赖安装必须预留足够磁盘空间**:PyTorch CUDA (~1GB) + SAM2 build (~500MB temp) 至少需要 5GB 可用空间 +4. **前端上传交互必须区分媒体类型**:视频和 DICOM 的上传流程、文件选择器 `accept`、后续解析逻辑完全不同,应提供明确的模式切换 + +--- + ## 2026-04-29-23-28-13 — 视频帧显示链路全修复 ### A. 具体问题 diff --git a/工程分析/需求分析-20260430_001744.md b/工程分析/需求分析-20260430_001744.md new file mode 100644 index 0000000..0f82fed --- /dev/null +++ b/工程分析/需求分析-20260430_001744.md @@ -0,0 +1,39 @@ +# 需求分析 — 2026-04-30 + +## 需求背景 +磁盘已扩容至 184GB,空间充足。用户提出 4 个核心需求: + +## 需求拆解 + +| 编号 | 需求 | 优先级 | 影响面 | +|------|------|--------|--------| +| R1 | 安装完整版 PyTorch CUDA + SAM2,恢复 GPU 推理 | P0 | conda 环境 | +| R2 | 项目库视频显示封面缩略图 | P0 | backend/media.py, ProjectLibrary.tsx | +| R3 | 项目库显示原始帧率,支持修改解析帧率 | P0 | backend/models.py, frame_parser.py, 前端 | +| R4 | DICOM 连续帧批量导入支持 | P0 | backend/media.py, ProjectLibrary.tsx | + +### R1 — PyTorch CUDA + SAM2 +- 当前 PyTorch 为 CPU 版本 (2.11.0+cpu) +- SAM2 未安装 +- GPU: RTX 4090 24GB,驱动 595.58.03 +- 目标: 安装 PyTorch 2.5+ CUDA 12.4 + SAM2,使 `/api/ai/predict` 使用真实 SAM2 推理 + +### R2 — 视频封面 +- 当前项目卡片只显示 Film 图标,无封面 +- 目标: 解析视频时提取第一帧作为封面 thumbnail,项目库显示为卡片背景图 + +### R3 — 帧率显示与修改 +- 当前 fps 为硬编码 "30FPS" +- 目标: + - 解析视频时读取真实原始帧率,存入 Project.original_fps + - 项目库显示原始帧率 + - 支持设置 parse_fps(解析帧率,可低于原始帧率) + - 后端解析时按 parse_fps 提取帧 + +### R4 — DICOM 批量导入 +- Data_Dicom帧/ 下有 300 个 .dcm 文件,共约 160MB +- 目标: + - 项目类型支持 `source_type`: video / dicom + - 导入时支持多选 .dcm 文件批量上传 + - 上传后解析为帧序列 + - 支持后续继续向同一项目新增 .dcm 文件