Stabilize speech audio capture
- Keep Web Audio source and processor nodes in refs so microphone processing is not lost during speech recognition. - Explicitly resume the AudioContext before sending PCM frames to the speech WebSocket proxy. - Disconnect audio nodes and clear speech state when listening stops or the WebSocket closes. - Show a clear prompt when audio frames were captured but no recognizable text was returned. - Update progress documentation for the speech capture hardening.
This commit is contained in:
@@ -93,3 +93,4 @@
|
||||
| 2026-05-02 | 模板 HTML 导出包补充模板字段和字段管理设置,导入时恢复字段库元数据。 |
|
||||
| 2026-05-02 | 修复报告编辑器加载已有 AI 区域后下拉栏初始显示“无可用 AI 区域”的问题。 |
|
||||
| 2026-05-02 | 调整抽帧百分比为两位小数保序保存;自动截图按时间顺序执行,自动插入按配置顺序执行。 |
|
||||
| 2026-05-02 | 加固报告编辑器语音采集,保留 Web Audio 节点引用、显式恢复 AudioContext,并在无识别文本时给出提示。 |
|
||||
|
||||
@@ -105,6 +105,11 @@ export default function ReportEditor() {
|
||||
const xfWsRef = useRef<WebSocket | null>(null);
|
||||
const xfAudioContextRef = useRef<AudioContext | null>(null);
|
||||
const xfMediaStreamRef = useRef<MediaStream | null>(null);
|
||||
const xfAudioSourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
|
||||
const xfAudioProcessorRef = useRef<ScriptProcessorNode | null>(null);
|
||||
const xfSpeechFrameCountRef = useRef(0);
|
||||
const xfSpeechTextReceivedRef = useRef(false);
|
||||
const xfSpeechUserStoppedRef = useRef(false);
|
||||
const [quickPrompts, setQuickPrompts] = useState<string[]>([
|
||||
'请完善报告内容', '请对内容做如下修改:'
|
||||
]);
|
||||
@@ -1143,6 +1148,17 @@ export default function ReportEditor() {
|
||||
const toggleListening = async () => {
|
||||
// 专门提取一个彻底关闭物理麦克风的函数
|
||||
const stopMicrophone = () => {
|
||||
if (xfAudioProcessorRef.current) {
|
||||
try {
|
||||
xfAudioProcessorRef.current.onaudioprocess = null;
|
||||
xfAudioProcessorRef.current.disconnect();
|
||||
} catch {}
|
||||
xfAudioProcessorRef.current = null;
|
||||
}
|
||||
if (xfAudioSourceRef.current) {
|
||||
try { xfAudioSourceRef.current.disconnect(); } catch {}
|
||||
xfAudioSourceRef.current = null;
|
||||
}
|
||||
if (xfAudioContextRef.current) {
|
||||
try { xfAudioContextRef.current.close(); } catch {}
|
||||
xfAudioContextRef.current = null;
|
||||
@@ -1155,6 +1171,7 @@ export default function ReportEditor() {
|
||||
|
||||
if (isListening) {
|
||||
setIsListening(false);
|
||||
xfSpeechUserStoppedRef.current = true;
|
||||
stopMicrophone();
|
||||
|
||||
if (xfWsRef.current && xfWsRef.current.readyState === WebSocket.OPEN) {
|
||||
@@ -1182,6 +1199,9 @@ export default function ReportEditor() {
|
||||
|
||||
const ws = new WebSocket(getSpeechIatWebSocketUrl());
|
||||
xfWsRef.current = ws;
|
||||
xfSpeechFrameCountRef.current = 0;
|
||||
xfSpeechTextReceivedRef.current = false;
|
||||
xfSpeechUserStoppedRef.current = false;
|
||||
let frameStatus = 0;
|
||||
|
||||
ws.onopen = async () => {
|
||||
@@ -1190,8 +1210,13 @@ export default function ReportEditor() {
|
||||
xfMediaStreamRef.current = stream;
|
||||
const audioContext = new AudioContextClass({ sampleRate: 16000 });
|
||||
xfAudioContextRef.current = audioContext;
|
||||
if (audioContext.state === 'suspended') {
|
||||
await audioContext.resume();
|
||||
}
|
||||
const source = audioContext.createMediaStreamSource(stream);
|
||||
const processor = audioContext.createScriptProcessor(4096, 1, 1);
|
||||
xfAudioSourceRef.current = source;
|
||||
xfAudioProcessorRef.current = processor;
|
||||
|
||||
processor.onaudioprocess = (e) => {
|
||||
if (ws.readyState !== WebSocket.OPEN || !xfAudioContextRef.current) return;
|
||||
@@ -1200,6 +1225,7 @@ export default function ReportEditor() {
|
||||
const base64Audio = arrayBufferToBase64(pcmBuffer);
|
||||
const frame: any = { data: { status: frameStatus, format: 'audio/L16;rate=16000', encoding: 'raw', audio: base64Audio } };
|
||||
ws.send(JSON.stringify(frame));
|
||||
xfSpeechFrameCountRef.current += 1;
|
||||
frameStatus = 1;
|
||||
};
|
||||
|
||||
@@ -1227,6 +1253,7 @@ export default function ReportEditor() {
|
||||
let seg = '';
|
||||
for (const w of jsonData.data.result.ws) { if (w.cw?.[0]?.w) seg += w.cw[0].w; }
|
||||
if (seg) {
|
||||
xfSpeechTextReceivedRef.current = true;
|
||||
setChatInput(prev => prev + seg);
|
||||
}
|
||||
}
|
||||
@@ -1240,7 +1267,17 @@ export default function ReportEditor() {
|
||||
};
|
||||
|
||||
ws.onerror = () => { alert('讯飞语音连接失败,请确认已登录且超级管理员已配置语音参数'); setIsListening(false); stopMicrophone(); };
|
||||
ws.onclose = () => { setIsListening(false); stopMicrophone(); };
|
||||
ws.onclose = () => {
|
||||
const shouldExplainNoText = xfSpeechUserStoppedRef.current
|
||||
&& xfSpeechFrameCountRef.current > 0
|
||||
&& !xfSpeechTextReceivedRef.current;
|
||||
setIsListening(false);
|
||||
stopMicrophone();
|
||||
xfWsRef.current = null;
|
||||
if (shouldExplainNoText) {
|
||||
alert('语音听写已结束,但讯飞没有返回可用文字。请确认麦克风输入音量正常,并尽量使用普通话靠近麦克风重试。');
|
||||
}
|
||||
};
|
||||
} catch (e: any) {
|
||||
alert('讯飞语音初始化失败: ' + e.message);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user