feat: 完善 AI 分割与工作区标注闭环

功能增加:

- 将视频导入和生成帧拆成两个明确动作,项目库生成帧时选择 FPS,工作区不再自动触发拆帧。

- 为工作区新增调整多边形工具,支持选中 mask、拖动顶点、边中点插点、双击边界按位置插点,并保留多 polygon 子区域编辑。

- 打通 AI 页 SAM2/SAM3 结果到工作区的联动,生成 mask 后自动选中,可在右侧分类树换标签,并推送到工作区继续编辑。

- 增强 Dashboard WebSocket 连接状态与心跳,使用真实 onopen/onclose/onerror 状态驱动前端显示。

- 完善 SAM3 external worker 适配,支持 box prompt、semantic 请求级阈值和 video tracker 路径。

bugfix:

- 修复 SAM2 文本语义误走自动分割的问题,改为提示使用点提示或切换 SAM3。

- 修复 SAM2 多候选重叠显示的问题,点提示和 auto fallback 默认只采用最高分候选。

- 修复 SAM2 反向点看起来无效的问题,带负点时启用背景过滤,过滤为空时移除旧候选。

- 修复 SAM3 单个 2D mask 结果无法转 polygon、低阈值 semantic 返回被默认阈值吞掉的问题。

- 修复 AI 页 mask 未选中导致分类树无法修改 SAM2 结果标签的问题。

测试和文档:

- 补充 CanvasArea、AISegmentation、ProjectLibrary、VideoWorkspace、Dashboard、websocket 和 SAM engine/API 测试。

- 新增 backend/tests/test_sam2_engine.py,覆盖 SAM2 单候选请求和 auto fallback 行为。

- 更新 README、AGENTS 和 doc 需求/设计/接口/测试矩阵,按当前实现冻结功能状态。
This commit is contained in:
2026-05-01 21:50:17 +08:00
parent 5ab4602535
commit 8a9247075e
31 changed files with 920 additions and 216 deletions

View File

@@ -19,6 +19,7 @@ type PromptBox = { x1: number; y1: number; x2: number; y2: number };
const DRAG_MANUAL_TOOLS = new Set(['create_rectangle', 'create_circle', 'create_line']);
const POLYGON_TOOL = 'create_polygon';
const EDIT_POLYGON_TOOL = 'edit_polygon';
const POINT_TOOL = 'create_point';
const BOOLEAN_TOOLS = new Set(['area_merge', 'area_remove']);
const POLYGON_CLOSE_RADIUS = 8;
@@ -95,6 +96,32 @@ function pointDistance(a: CanvasPoint, b: CanvasPoint): number {
return Math.hypot(a.x - b.x, a.y - b.y);
}
function distanceToSegmentSquared(point: CanvasPoint, start: CanvasPoint, end: CanvasPoint): number {
const dx = end.x - start.x;
const dy = end.y - start.y;
const lengthSquared = dx * dx + dy * dy;
if (lengthSquared === 0) {
return (point.x - start.x) ** 2 + (point.y - start.y) ** 2;
}
const t = clamp(((point.x - start.x) * dx + (point.y - start.y) * dy) / lengthSquared, 0, 1);
const projected = { x: start.x + t * dx, y: start.y + t * dy };
return (point.x - projected.x) ** 2 + (point.y - projected.y) ** 2;
}
function nearestPolygonEdgeIndex(points: CanvasPoint[], point: CanvasPoint): number {
return points.reduce((bestIndex, start, index) => {
const end = points[(index + 1) % points.length];
if (!end) return bestIndex;
const bestStart = points[bestIndex];
const bestEnd = points[(bestIndex + 1) % points.length];
const currentDistance = distanceToSegmentSquared(point, start, end);
const bestDistance = bestStart && bestEnd
? distanceToSegmentSquared(point, bestStart, bestEnd)
: Number.POSITIVE_INFINITY;
return currentDistance < bestDistance ? index : bestIndex;
}, 0);
}
function segmentationArea(segmentation?: number[][]): number {
return (segmentation || []).reduce((sum, polygon) => sum + polygonArea(flatPolygonToPoints(polygon)), 0);
}
@@ -210,10 +237,11 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
const [manualStart, setManualStart] = useState<CanvasPoint | null>(null);
const [manualCurrent, setManualCurrent] = useState<CanvasPoint | null>(null);
const [polygonPoints, setPolygonPoints] = useState<CanvasPoint[]>([]);
const [selectedMaskId, setSelectedMaskId] = useState<string | null>(null);
const [selectedMaskIds, setSelectedMaskIds] = useState<string[]>([]);
const [selectedMaskId, setSelectedMaskId] = useState<string | null>(() => useStore.getState().selectedMaskIds[0] || null);
const [selectedMaskIds, setSelectedMaskIds] = useState<string[]>(() => useStore.getState().selectedMaskIds);
const [selectedPolygonIndex, setSelectedPolygonIndex] = useState(0);
const [selectedVertexIndex, setSelectedVertexIndex] = useState<number | null>(null);
const previousFrameIdRef = useRef<string | undefined>(frame?.id);
const [isInferencing, setIsInferencing] = useState(false);
const [inferenceMessage, setInferenceMessage] = useState('');
@@ -253,6 +281,7 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
const draftMaskCount = frameMasks.filter((mask) => !mask.annotationId).length;
const dirtyMaskCount = frameMasks.filter((mask) => mask.saveStatus === 'dirty').length;
const isBooleanTool = BOOLEAN_TOOLS.has(effectiveTool);
const isPolygonEditTool = effectiveTool === 'move' || effectiveTool === EDIT_POLYGON_TOOL;
useEffect(() => {
const handleResize = () => {
@@ -273,11 +302,22 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
setManualStart(null);
setManualCurrent(null);
setPolygonPoints([]);
setSelectedVertexIndex(null);
if (!isPolygonEditTool && !isBooleanTool) {
setSelectedMaskId(null);
setSelectedMaskIds([]);
setSelectedPolygonIndex(0);
}
}, [effectiveTool, isBooleanTool, isPolygonEditTool]);
useEffect(() => {
if (previousFrameIdRef.current === frame?.id) return;
previousFrameIdRef.current = frame?.id;
setSelectedMaskId(null);
setSelectedMaskIds([]);
setSelectedPolygonIndex(0);
setSelectedVertexIndex(null);
}, [effectiveTool, frame?.id]);
}, [frame?.id]);
useEffect(() => {
setPoints([]);
@@ -420,6 +460,10 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
setIsInferencing(true);
setInferenceMessage('');
try {
const hasNegativePrompt = Boolean(promptPoints?.some((point) => point.type === 'neg'));
const existingCandidate = !options.resetCandidate && samCandidateMaskId
? masks.find((mask) => mask.id === samCandidateMaskId)
: null;
const result = await predictMask({
imageId: frame.id,
imageWidth,
@@ -429,13 +473,11 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
? promptPoints.map((p) => ({ x: p.x, y: p.y, type: p.type }))
: undefined,
box: promptBox,
...(hasNegativePrompt ? { options: { auto_filter_background: true, min_score: 0.05 } } : {}),
});
const [m] = result.masks;
if (m) {
const existingCandidate = !options.resetCandidate && samCandidateMaskId
? masks.find((mask) => mask.id === samCandidateMaskId)
: null;
const label = activeClass?.name || existingCandidate?.label || m.label;
const color = activeClass?.color || existingCandidate?.color || m.color;
const metadata = {
@@ -443,6 +485,7 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
source: aiModel === 'sam3' ? 'sam3_box' : 'sam2_interactive',
promptBox: promptBox || null,
promptPointCount: promptPoints?.length || 0,
promptNegativePointCount: promptPoints?.filter((point) => point.type === 'neg').length || 0,
};
const nextMask = {
frameId: frame.id,
@@ -476,7 +519,15 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
});
}
} else {
setInferenceMessage('模型没有返回可用区域,请调整点/框提示后重试。');
if (existingCandidate && hasNegativePrompt) {
setMasks(masks.filter((mask) => mask.id !== existingCandidate.id));
setSamCandidateMaskId(null);
setSelectedMaskId(null);
setSelectedMaskIds([]);
setInferenceMessage('反向点已排除当前候选区域,请重新框选或添加新的正向点。');
} else {
setInferenceMessage('模型没有返回可用区域,请调整点/框提示后重试。');
}
}
} catch (err) {
console.error('Inference failed:', err);
@@ -485,7 +536,7 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
} finally {
setIsInferencing(false);
}
}, [activeClass, activeTemplateId, addMask, aiModel, frame?.height, frame?.id, frame?.width, image?.height, image?.naturalHeight, image?.naturalWidth, image?.width, masks, samCandidateMaskId, updateMask]);
}, [activeClass, activeTemplateId, addMask, aiModel, frame?.height, frame?.id, frame?.width, image?.height, image?.naturalHeight, image?.naturalWidth, image?.width, masks, samCandidateMaskId, setMasks, updateMask]);
const handleApplyActiveClass = () => {
if (!frame?.id || !activeClass) return;
@@ -598,7 +649,7 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
};
const handleStageClick = (e: any) => {
if (effectiveTool === 'move') return;
if (isPolygonEditTool) return;
if (effectiveTool === 'box_select') return; // handled by mouseup
if (DRAG_MANUAL_TOOLS.has(effectiveTool)) return;
@@ -716,7 +767,7 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
window.addEventListener('keydown', handleKeyDown);
return () => window.removeEventListener('keydown', handleKeyDown);
}, [deleteMasksById, effectiveTool, finishPolygon, polygonPoints, redoMasks, selectedMask, selectedMaskIds, selectedPolygonIndex, selectedVertexIndex, undoMasks, updatePolygonMask]);
}, [deleteMasksById, effectiveTool, finishPolygon, isPolygonEditTool, polygonPoints, redoMasks, selectedMask, selectedMaskIds, selectedPolygonIndex, selectedVertexIndex, undoMasks, updatePolygonMask]);
const boxRect = React.useMemo(() => {
if (!boxStart || !boxCurrent) return null;
@@ -753,7 +804,7 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
};
const handleMaskSelect = (mask: Mask, event: any, polygonIndex = 0) => {
if (effectiveTool !== 'move' && !isBooleanTool) return;
if (!isPolygonEditTool && !isBooleanTool) return;
event.cancelBubble = true;
if (isBooleanTool) {
setSelectedMaskIds((current) => (
@@ -807,6 +858,25 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
updatePolygonMask(mask, nextPoints, selectedPolygonIndex);
};
const handlePathDoubleClick = (mask: Mask, event: any, polygonIndex = 0) => {
if (effectiveTool !== EDIT_POLYGON_TOOL) return;
event.cancelBubble = true;
const point = stagePoint(event);
const currentPoints = segmentationToPoints(mask.segmentation, polygonIndex);
if (!point || currentPoints.length < 3) return;
const edgeIndex = nearestPolygonEdgeIndex(currentPoints, point);
const nextPoints = [
...currentPoints.slice(0, edgeIndex + 1),
point,
...currentPoints.slice(edgeIndex + 1),
];
setSelectedMaskId(mask.id);
setSelectedMaskIds([mask.id]);
setSelectedPolygonIndex(polygonIndex);
setSelectedVertexIndex(edgeIndex + 1);
updatePolygonMask(mask, nextPoints, polygonIndex);
};
const handleBooleanOperation = async () => {
if (!frame || booleanSelectedMasks.length < 2) return;
const primary = booleanSelectedMasks[0];
@@ -918,6 +988,8 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
strokeWidth={(selectedMaskIds.includes(mask.id) ? 2 : 1) / scale}
onClick={(event: any) => handleMaskSelect(mask, event, polygonIndex)}
onTap={(event: any) => handleMaskSelect(mask, event, polygonIndex)}
onDblClick={(event: any) => handlePathDoubleClick(mask, event, polygonIndex)}
onDblTap={(event: any) => handlePathDoubleClick(mask, event, polygonIndex)}
/>
))}
</Group>
@@ -987,7 +1059,7 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
)))}
{/* Polygon edge insertion handles */}
{!isBooleanTool && selectedMask && selectedMaskPoints.map((point, index) => {
{isPolygonEditTool && selectedMask && selectedMaskPoints.map((point, index) => {
const next = selectedMaskPoints[(index + 1) % selectedMaskPoints.length];
if (!next) return null;
return (
@@ -1006,7 +1078,7 @@ export function CanvasArea({ activeTool, frame, onClearMasks, onDeleteMaskAnnota
})}
{/* Polygon vertex editor */}
{!isBooleanTool && selectedMask && selectedMaskPoints.map((point, index) => (
{isPolygonEditTool && selectedMask && selectedMaskPoints.map((point, index) => (
<Circle
key={`${selectedMask.id}-vertex-${selectedPolygonIndex}-${index}`}
x={point.x}