Stabilize speech audio capture

- Keep Web Audio source and processor nodes in refs so microphone processing is not lost during speech recognition.

- Explicitly resume the AudioContext before sending PCM frames to the speech WebSocket proxy.

- Disconnect audio nodes and clear speech state when listening stops or the WebSocket closes.

- Show a clear prompt when audio frames were captured but no recognizable text was returned.

- Update progress documentation for the speech capture hardening.
This commit is contained in:
2026-05-02 06:20:48 +08:00
parent 5d936832da
commit 87ab7d4b9c
2 changed files with 39 additions and 1 deletions

View File

@@ -93,3 +93,4 @@
| 2026-05-02 | 模板 HTML 导出包补充模板字段和字段管理设置,导入时恢复字段库元数据。 |
| 2026-05-02 | 修复报告编辑器加载已有 AI 区域后下拉栏初始显示“无可用 AI 区域”的问题。 |
| 2026-05-02 | 调整抽帧百分比为两位小数保序保存;自动截图按时间顺序执行,自动插入按配置顺序执行。 |
| 2026-05-02 | 加固报告编辑器语音采集,保留 Web Audio 节点引用、显式恢复 AudioContext并在无识别文本时给出提示。 |

View File

@@ -105,6 +105,11 @@ export default function ReportEditor() {
const xfWsRef = useRef<WebSocket | null>(null);
const xfAudioContextRef = useRef<AudioContext | null>(null);
const xfMediaStreamRef = useRef<MediaStream | null>(null);
const xfAudioSourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
const xfAudioProcessorRef = useRef<ScriptProcessorNode | null>(null);
const xfSpeechFrameCountRef = useRef(0);
const xfSpeechTextReceivedRef = useRef(false);
const xfSpeechUserStoppedRef = useRef(false);
const [quickPrompts, setQuickPrompts] = useState<string[]>([
'请完善报告内容', '请对内容做如下修改:'
]);
@@ -1143,6 +1148,17 @@ export default function ReportEditor() {
const toggleListening = async () => {
// 专门提取一个彻底关闭物理麦克风的函数
const stopMicrophone = () => {
if (xfAudioProcessorRef.current) {
try {
xfAudioProcessorRef.current.onaudioprocess = null;
xfAudioProcessorRef.current.disconnect();
} catch {}
xfAudioProcessorRef.current = null;
}
if (xfAudioSourceRef.current) {
try { xfAudioSourceRef.current.disconnect(); } catch {}
xfAudioSourceRef.current = null;
}
if (xfAudioContextRef.current) {
try { xfAudioContextRef.current.close(); } catch {}
xfAudioContextRef.current = null;
@@ -1155,6 +1171,7 @@ export default function ReportEditor() {
if (isListening) {
setIsListening(false);
xfSpeechUserStoppedRef.current = true;
stopMicrophone();
if (xfWsRef.current && xfWsRef.current.readyState === WebSocket.OPEN) {
@@ -1182,6 +1199,9 @@ export default function ReportEditor() {
const ws = new WebSocket(getSpeechIatWebSocketUrl());
xfWsRef.current = ws;
xfSpeechFrameCountRef.current = 0;
xfSpeechTextReceivedRef.current = false;
xfSpeechUserStoppedRef.current = false;
let frameStatus = 0;
ws.onopen = async () => {
@@ -1190,8 +1210,13 @@ export default function ReportEditor() {
xfMediaStreamRef.current = stream;
const audioContext = new AudioContextClass({ sampleRate: 16000 });
xfAudioContextRef.current = audioContext;
if (audioContext.state === 'suspended') {
await audioContext.resume();
}
const source = audioContext.createMediaStreamSource(stream);
const processor = audioContext.createScriptProcessor(4096, 1, 1);
xfAudioSourceRef.current = source;
xfAudioProcessorRef.current = processor;
processor.onaudioprocess = (e) => {
if (ws.readyState !== WebSocket.OPEN || !xfAudioContextRef.current) return;
@@ -1200,6 +1225,7 @@ export default function ReportEditor() {
const base64Audio = arrayBufferToBase64(pcmBuffer);
const frame: any = { data: { status: frameStatus, format: 'audio/L16;rate=16000', encoding: 'raw', audio: base64Audio } };
ws.send(JSON.stringify(frame));
xfSpeechFrameCountRef.current += 1;
frameStatus = 1;
};
@@ -1227,6 +1253,7 @@ export default function ReportEditor() {
let seg = '';
for (const w of jsonData.data.result.ws) { if (w.cw?.[0]?.w) seg += w.cw[0].w; }
if (seg) {
xfSpeechTextReceivedRef.current = true;
setChatInput(prev => prev + seg);
}
}
@@ -1240,7 +1267,17 @@ export default function ReportEditor() {
};
ws.onerror = () => { alert('讯飞语音连接失败,请确认已登录且超级管理员已配置语音参数'); setIsListening(false); stopMicrophone(); };
ws.onclose = () => { setIsListening(false); stopMicrophone(); };
ws.onclose = () => {
const shouldExplainNoText = xfSpeechUserStoppedRef.current
&& xfSpeechFrameCountRef.current > 0
&& !xfSpeechTextReceivedRef.current;
setIsListening(false);
stopMicrophone();
xfWsRef.current = null;
if (shouldExplainNoText) {
alert('语音听写已结束,但讯飞没有返回可用文字。请确认麦克风输入音量正常,并尽量使用普通话靠近麦克风重试。');
}
};
} catch (e: any) {
alert('讯飞语音初始化失败: ' + e.message);
}