Pre_Seg_Server/backend/services/frame_parser.py

"""Video/DICOM frame parsing and MinIO upload utilities."""

import logging
import os
import re
import shutil
import subprocess
from pathlib import Path
from typing import List, Optional, Tuple

import cv2
import numpy as np
from pydicom import dcmread

from minio_client import upload_file, BUCKET_NAME

logger = logging.getLogger(__name__)


def natural_filename_key(filename: str) -> Tuple[object, ...]:
    """Sort file names by their visible numeric order instead of pure lexicographic order."""
    return tuple(
        int(part) if part.isdigit() else part.casefold()
        for part in re.split(r"(\d+)", Path(filename).name)
    )


def get_video_fps(video_path: str) -> float:
    """Read the original frame rate of a video file."""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return 30.0
    fps = cap.get(cv2.CAP_PROP_FPS)
    cap.release()
    return fps if fps > 0 else 30.0


def extract_thumbnail(video_path: str, output_path: str, width: int = 640) -> str:
    """Extract the first frame of a video as a thumbnail JPEG."""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise RuntimeError(f"Cannot open video for thumbnail: {video_path}")
    ret, frame = cap.read()
    cap.release()
    if not ret or frame is None:
        raise RuntimeError(f"Cannot read first frame from: {video_path}")

    h, w = frame.shape[:2]
    if w > width:
        scale = width / w
        new_w = int(w * scale)
        new_h = int(h * scale)
        frame = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)

    cv2.imwrite(output_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
    return output_path


def parse_video(
    video_path: str,
    output_dir: str,
    fps: int = 30,
    max_frames: Optional[int] = None,
    target_width: int = 640,
) -> Tuple[List[str], float]:
    """Extract frames from a video file using FFmpeg or OpenCV fallback.

    Args:
        video_path: Path to the input video file.
        output_dir: Directory to save extracted frames.
        fps: Target frame extraction rate.
        max_frames: Optional maximum number of frames to extract.
        target_width: Output frame width for model-friendly frame sequences.

    Returns:
        Tuple of (frame_paths, original_fps).
    """
    os.makedirs(output_dir, exist_ok=True)
    frame_paths: List[str] = []
    original_fps = get_video_fps(video_path)
    safe_fps = max(int(fps), 1)
    safe_width = max(int(target_width), 1)

    # Try FFmpeg first
    if shutil.which("ffmpeg"):
        try:
            pattern = os.path.join(output_dir, "frame_%06d.jpg")
            cmd = [
                "ffmpeg",
                "-i", video_path,
                "-vf", f"fps={safe_fps},scale={safe_width}:-1",
                "-start_number", "0",
                "-q:v", "5",
                "-y",
                pattern,
            ]
            logger.info("Running FFmpeg: %s", " ".join(cmd))
            result = subprocess.run(cmd, capture_output=True, text=True, check=False)
            if result.returncode == 0:
                frame_paths = sorted(
                    [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".jpg")]
                )
                if max_frames:
                    frame_paths = frame_paths[:max_frames]
                logger.info("Extracted %d frames via FFmpeg", len(frame_paths))
                return frame_paths, original_fps
            else:
                logger.warning("FFmpeg failed: %s", result.stderr)
        except Exception as exc:  # noqa: BLE001
            logger.warning("FFmpeg exception: %s", exc)

    # OpenCV fallback
    logger.info("Falling back to OpenCV frame extraction")
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise RuntimeError(f"Cannot open video: {video_path}")

    video_fps = cap.get(cv2.CAP_PROP_FPS) or 30
    interval = max(1, int(round(video_fps / safe_fps)))
    count = 0
    saved = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if count % interval == 0:
            path = os.path.join(output_dir, f"frame_{saved:06d}.jpg")
            h, w = frame.shape[:2]
            if safe_width > 0 and w != safe_width:
                scale = safe_width / max(w, 1)
                frame = cv2.resize(frame, (safe_width, max(1, int(round(h * scale)))), interpolation=cv2.INTER_AREA)
            cv2.imwrite(path, frame, [cv2.IMWRITE_JPEG_QUALITY, 80])
            frame_paths.append(path)
            saved += 1
            if max_frames and saved >= max_frames:
                break
        count += 1

    cap.release()
    logger.info("Extracted %d frames via OpenCV", len(frame_paths))
    return frame_paths, original_fps


def parse_dicom(
    dicom_dir: str,
    output_dir: str,
    max_frames: Optional[int] = None,
) -> List[str]:
    """Extract frames from DICOM files in a directory.

    Args:
        dicom_dir: Directory containing .dcm files.
        output_dir: Directory to save extracted frames.
        max_frames: Optional maximum number of frames to extract.

    Returns:
        List of paths to extracted frame images.
    """
    os.makedirs(output_dir, exist_ok=True)
    dcm_files = sorted(
        [f for f in os.listdir(dicom_dir) if f.lower().endswith(".dcm")],
        key=natural_filename_key,
    )

    frame_paths: List[str] = []
    for idx, fname in enumerate(dcm_files):
        if max_frames and idx >= max_frames:
            break
        path = os.path.join(dicom_dir, fname)
        try:
            ds = dcmread(path)
            pixel_array = ds.pixel_array

            # Normalize to 8-bit
            if pixel_array.dtype != np.uint8:
                pixel_array = pixel_array.astype(np.float32)
                pixel_array = (
                    (pixel_array - pixel_array.min())
                    / (pixel_array.max() - pixel_array.min() + 1e-8)
                    * 255
                )
                pixel_array = pixel_array.astype(np.uint8)

            # Handle multi-frame DICOM
            if pixel_array.ndim == 3:
                for f in range(pixel_array.shape[0]):
                    out_path = os.path.join(output_dir, f"frame_{idx:06d}_{f:03d}.jpg")
                    cv2.imwrite(out_path, pixel_array[f], [cv2.IMWRITE_JPEG_QUALITY, 85])
                    frame_paths.append(out_path)
            else:
                out_path = os.path.join(output_dir, f"frame_{idx:06d}.jpg")
                cv2.imwrite(out_path, pixel_array, [cv2.IMWRITE_JPEG_QUALITY, 85])
                frame_paths.append(out_path)
        except Exception as exc:  # noqa: BLE001
            logger.error("Failed to read DICOM %s: %s", path, exc)

    logger.info("Extracted %d frames from DICOM", len(frame_paths))
    return frame_paths


def upload_frames_to_minio(
    frames: List[str],
    project_id: int,
    object_prefix: Optional[str] = None,
) -> List[str]:
    """Upload a list of local frame images to MinIO.

    Args:
        frames: List of local file paths.
        project_id: Project ID used for bucket path organization.
        object_prefix: Optional prefix override.

    Returns:
        List of object names (paths) in MinIO.
    """
    prefix = object_prefix or f"projects/{project_id}/frames"
    object_names: List[str] = []

    for frame_path in frames:
        fname = os.path.basename(frame_path)
        object_name = f"{prefix}/{fname}"
        try:
            with open(frame_path, "rb") as f:
                data = f.read()
            upload_file(
                object_name,
                data,
                content_type="image/jpeg",
                length=len(data),
            )
            object_names.append(object_name)
        except Exception as exc:  # noqa: BLE001
            logger.error("Failed to upload %s: %s", frame_path, exc)

    logger.info("Uploaded %d/%d frames to MinIO", len(object_names), len(frames))
    return object_names