"""Standalone SAM 3 helper for the dedicated Python 3.12 runtime. The main FastAPI backend can keep running in the existing Python 3.11/SAM 2 environment while this helper is executed with a separate conda env that meets SAM 3's stricter runtime requirements. """ from __future__ import annotations import argparse import importlib.util import json import os import sys from pathlib import Path from typing import Any import numpy as np from PIL import Image def _torch_status() -> tuple[bool, str | None, str | None, str | None]: try: import torch cuda_available = bool(torch.cuda.is_available()) return ( cuda_available, getattr(torch, "__version__", None), getattr(torch.version, "cuda", None), torch.cuda.get_device_name(0) if cuda_available else None, ) except Exception: # noqa: BLE001 return False, None, None, None def _compact_error(exc: Exception) -> str: lines = [line.strip() for line in str(exc).splitlines() if line.strip()] for line in lines: if "Access to model" in line or "Cannot access gated repo" in line: return line return lines[0] if lines else exc.__class__.__name__ def _checkpoint_access(model_version: str) -> tuple[bool, str | None]: try: from huggingface_hub import hf_hub_download repo_id = "facebook/sam3.1" if model_version == "sam3.1" else "facebook/sam3" hf_hub_download(repo_id=repo_id, filename="config.json") return True, None except Exception as exc: # noqa: BLE001 return False, _compact_error(exc) def runtime_status() -> dict[str, Any]: model_version = os.environ.get("SAM3_MODEL_VERSION", "sam3") package_error = None package_available = importlib.util.find_spec("sam3") is not None if package_available: try: import sam3 # noqa: F401 except Exception as exc: # noqa: BLE001 package_available = False package_error = str(exc) cuda_available, torch_version, cuda_version, device_name = _torch_status() python_ok = sys.version_info >= (3, 12) checkpoint_access = False checkpoint_error = None if package_available: checkpoint_access, checkpoint_error = _checkpoint_access(model_version) available = bool(package_available and python_ok and cuda_available and checkpoint_access) missing = [] if not python_ok: missing.append("Python 3.12+ runtime") if not package_available: missing.append(f"sam3 package ({package_error})" if package_error else "sam3 package") if torch_version is None: missing.append("PyTorch") if not cuda_available: missing.append("CUDA GPU") if package_available and not checkpoint_access: missing.append(f"Hugging Face checkpoint access ({checkpoint_error})") return { "available": available, "package_available": package_available, "checkpoint_access": checkpoint_access, "python_ok": python_ok, "torch_ok": torch_version is not None, "torch_version": torch_version, "cuda_version": cuda_version, "cuda_available": cuda_available, "device": "cuda" if cuda_available else "unavailable", "device_name": device_name, "message": ( "SAM 3 external runtime is ready." if available else f"SAM 3 external runtime unavailable: missing {', '.join(missing)}." ), } def _mask_to_polygon(mask: np.ndarray) -> list[list[float]]: import cv2 if mask.dtype != np.uint8: mask = (mask > 0).astype(np.uint8) contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) height, width = mask.shape[:2] largest = [] for contour in contours: if len(contour) > len(largest): largest = contour if len(largest) < 3: return [] return [[float(point[0][0]) / width, float(point[0][1]) / height] for point in largest] def _to_numpy(value: Any) -> np.ndarray: if hasattr(value, "detach"): value = value.detach().cpu().numpy() elif hasattr(value, "cpu"): value = value.cpu().numpy() return np.asarray(value) def predict(request_path: Path) -> dict[str, Any]: import torch from sam3.model.sam3_image_processor import Sam3Processor from sam3.model_builder import build_sam3_image_model payload = json.loads(request_path.read_text(encoding="utf-8")) image_path = Path(payload["image_path"]) text = str(payload["text"]).strip() threshold = float(payload.get("confidence_threshold", 0.5)) if not text: raise ValueError("SAM 3 semantic prompt requires non-empty text.") torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True image = Image.open(image_path).convert("RGB") with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16): model = build_sam3_image_model() processor = Sam3Processor(model, confidence_threshold=threshold) state = processor.set_image(image) output = processor.set_text_prompt(state=state, prompt=text) masks = _to_numpy(output.get("masks", [])) scores = _to_numpy(output.get("scores", [])) if masks.ndim == 4: masks = masks[:, 0] elif masks.ndim == 3 and masks.shape[0] == 1: masks = masks[None, 0] polygons = [] for mask in masks: polygon = _mask_to_polygon(mask) if polygon: polygons.append(polygon) return { "polygons": polygons, "scores": scores.astype(float).tolist() if scores.size else [], } def main() -> int: parser = argparse.ArgumentParser(description="SAM 3 external runtime helper") parser.add_argument("--status", action="store_true") parser.add_argument("--request", type=Path) args = parser.parse_args() try: if args.status: print(json.dumps(runtime_status(), ensure_ascii=False)) return 0 if args.request: print(json.dumps(predict(args.request), ensure_ascii=False)) return 0 parser.error("Use --status or --request") except Exception as exc: # noqa: BLE001 print(json.dumps({"error": str(exc)}, ensure_ascii=False), file=sys.stderr) return 1 return 2 if __name__ == "__main__": raise SystemExit(main())