From 87ab7d4b9cdf85b8fab4224195b21997a17307df Mon Sep 17 00:00:00 2001 From: admin <572701190@qq.com> Date: Sat, 2 May 2026 06:20:48 +0800 Subject: [PATCH] Stabilize speech audio capture - Keep Web Audio source and processor nodes in refs so microphone processing is not lost during speech recognition. - Explicitly resume the AudioContext before sending PCM frames to the speech WebSocket proxy. - Disconnect audio nodes and clear speech state when listening stops or the WebSocket closes. - Show a clear prompt when audio frames were captured but no recognizable text was returned. - Update progress documentation for the speech capture hardening. --- docs/progress.md | 1 + src/pages/ReportEditor.tsx | 39 +++++++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/docs/progress.md b/docs/progress.md index 49240e6..ee567ac 100644 --- a/docs/progress.md +++ b/docs/progress.md @@ -93,3 +93,4 @@ | 2026-05-02 | 模板 HTML 导出包补充模板字段和字段管理设置,导入时恢复字段库元数据。 | | 2026-05-02 | 修复报告编辑器加载已有 AI 区域后下拉栏初始显示“无可用 AI 区域”的问题。 | | 2026-05-02 | 调整抽帧百分比为两位小数保序保存;自动截图按时间顺序执行,自动插入按配置顺序执行。 | +| 2026-05-02 | 加固报告编辑器语音采集,保留 Web Audio 节点引用、显式恢复 AudioContext,并在无识别文本时给出提示。 | diff --git a/src/pages/ReportEditor.tsx b/src/pages/ReportEditor.tsx index a8a7c54..97e752b 100644 --- a/src/pages/ReportEditor.tsx +++ b/src/pages/ReportEditor.tsx @@ -105,6 +105,11 @@ export default function ReportEditor() { const xfWsRef = useRef(null); const xfAudioContextRef = useRef(null); const xfMediaStreamRef = useRef(null); + const xfAudioSourceRef = useRef(null); + const xfAudioProcessorRef = useRef(null); + const xfSpeechFrameCountRef = useRef(0); + const xfSpeechTextReceivedRef = useRef(false); + const xfSpeechUserStoppedRef = useRef(false); const [quickPrompts, setQuickPrompts] = useState([ '请完善报告内容', '请对内容做如下修改:' ]); @@ -1143,6 +1148,17 @@ export default function ReportEditor() { const toggleListening = async () => { // 专门提取一个彻底关闭物理麦克风的函数 const stopMicrophone = () => { + if (xfAudioProcessorRef.current) { + try { + xfAudioProcessorRef.current.onaudioprocess = null; + xfAudioProcessorRef.current.disconnect(); + } catch {} + xfAudioProcessorRef.current = null; + } + if (xfAudioSourceRef.current) { + try { xfAudioSourceRef.current.disconnect(); } catch {} + xfAudioSourceRef.current = null; + } if (xfAudioContextRef.current) { try { xfAudioContextRef.current.close(); } catch {} xfAudioContextRef.current = null; @@ -1155,6 +1171,7 @@ export default function ReportEditor() { if (isListening) { setIsListening(false); + xfSpeechUserStoppedRef.current = true; stopMicrophone(); if (xfWsRef.current && xfWsRef.current.readyState === WebSocket.OPEN) { @@ -1182,6 +1199,9 @@ export default function ReportEditor() { const ws = new WebSocket(getSpeechIatWebSocketUrl()); xfWsRef.current = ws; + xfSpeechFrameCountRef.current = 0; + xfSpeechTextReceivedRef.current = false; + xfSpeechUserStoppedRef.current = false; let frameStatus = 0; ws.onopen = async () => { @@ -1190,8 +1210,13 @@ export default function ReportEditor() { xfMediaStreamRef.current = stream; const audioContext = new AudioContextClass({ sampleRate: 16000 }); xfAudioContextRef.current = audioContext; + if (audioContext.state === 'suspended') { + await audioContext.resume(); + } const source = audioContext.createMediaStreamSource(stream); const processor = audioContext.createScriptProcessor(4096, 1, 1); + xfAudioSourceRef.current = source; + xfAudioProcessorRef.current = processor; processor.onaudioprocess = (e) => { if (ws.readyState !== WebSocket.OPEN || !xfAudioContextRef.current) return; @@ -1200,6 +1225,7 @@ export default function ReportEditor() { const base64Audio = arrayBufferToBase64(pcmBuffer); const frame: any = { data: { status: frameStatus, format: 'audio/L16;rate=16000', encoding: 'raw', audio: base64Audio } }; ws.send(JSON.stringify(frame)); + xfSpeechFrameCountRef.current += 1; frameStatus = 1; }; @@ -1227,6 +1253,7 @@ export default function ReportEditor() { let seg = ''; for (const w of jsonData.data.result.ws) { if (w.cw?.[0]?.w) seg += w.cw[0].w; } if (seg) { + xfSpeechTextReceivedRef.current = true; setChatInput(prev => prev + seg); } } @@ -1240,7 +1267,17 @@ export default function ReportEditor() { }; ws.onerror = () => { alert('讯飞语音连接失败,请确认已登录且超级管理员已配置语音参数'); setIsListening(false); stopMicrophone(); }; - ws.onclose = () => { setIsListening(false); stopMicrophone(); }; + ws.onclose = () => { + const shouldExplainNoText = xfSpeechUserStoppedRef.current + && xfSpeechFrameCountRef.current > 0 + && !xfSpeechTextReceivedRef.current; + setIsListening(false); + stopMicrophone(); + xfWsRef.current = null; + if (shouldExplainNoText) { + alert('语音听写已结束,但讯飞没有返回可用文字。请确认麦克风输入音量正常,并尽量使用普通话靠近麦克风重试。'); + } + }; } catch (e: any) { alert('讯飞语音初始化失败: ' + e.message); }