diff --git a/docs/api-contract.md b/docs/api-contract.md index 7f29e82..11a7c5a 100644 --- a/docs/api-contract.md +++ b/docs/api-contract.md @@ -541,8 +541,10 @@ pageSize?: number - 请求上下文只能包含当前报告内容和当前报告内用户有权访问的图片/关键帧。 - 不允许跨部门检索报告作为上下文。 - 当前实现接收 OpenAI 兼容 `messages`、温度等参数,后端会用全局 Provider 的 `modelName` 覆盖请求中的 `model`,所有用户共用同一套 key。 +- 报告编辑器 AI 写作请求会使用 OpenAI 兼容 JSON Mode:`response_format: { "type": "json_object" }`,同时在 prompt 中要求返回 JSON。 +- Kimi 默认主模型为 `kimi-k2.6`;对 K2.6/K2.5 请求会移除不兼容的采样参数,并默认传入 `thinking: { "type": "disabled" }` 以适配报告生成的低延迟场景。 - Provider 可配置 `fallbackModelName` 作为备用模型;主模型在短暂重试后仍返回 `429/5xx` 或请求超时时,后端会自动改用备用模型再请求一次。 -- 上游模型返回 `429/5xx` 等临时错误时,后端会对 `/chat/completions` 做短暂重试;无可用备用模型或备用模型仍失败时保留上游 HTTP 状态码,并通过错误码区分 `AI_PROVIDER_OVERLOADED`、`AI_PROVIDER_RATE_LIMITED`、`AI_PROVIDER_UNAVAILABLE`、`AI_PROVIDER_TIMEOUT` 或 `AI_PROVIDER_ERROR`。 +- 上游模型返回过载、限流、`5xx` 等临时错误时,后端会对 `/chat/completions` 做短暂重试;无可用备用模型或备用模型仍失败时保留上游 HTTP 状态码,并通过错误码区分 `AI_PROVIDER_OVERLOADED`、`AI_PROVIDER_RATE_LIMITED`、`AI_PROVIDER_QUOTA_EXCEEDED`、`AI_PROVIDER_UNAVAILABLE`、`AI_PROVIDER_TIMEOUT` 或 `AI_PROVIDER_ERROR`。账户余额/额度不足不会触发重试或备用模型。 ## Speech API diff --git a/server/src/ai/ai.service.test.ts b/server/src/ai/ai.service.test.ts index c71ae9f..b183f78 100644 --- a/server/src/ai/ai.service.test.ts +++ b/server/src/ai/ai.service.test.ts @@ -91,6 +91,36 @@ describe('AiService', () => { expect(fetchMock).toHaveBeenCalledTimes(3); }); + it('does not retry or fallback when the provider reports quota or balance errors', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify({ + error: { + type: 'exceeded_current_quota_error', + message: 'You exceeded your current token quota, please check your account balance', + }, + }), { status: 429 }), + ); + vi.stubGlobal('fetch', fetchMock); + + await expect(createService( + 'moonshot-v1-32k', + 'https://api.moonshot.cn/v1', + 'moonshot-v1-auto', + ).chat(actor, { + messages: [{ role: 'user', content: '请完善报告内容' }], + })).rejects.toMatchObject({ + response: expect.objectContaining({ + code: 'AI_PROVIDER_QUOTA_EXCEEDED', + message: expect.stringContaining('AI 服务请求失败:429'), + }), + status: 429, + }); + + const requestedModels = fetchMock.mock.calls.map((call) => JSON.parse(String(call[1]?.body)).model); + expect(fetchMock).toHaveBeenCalledTimes(1); + expect(requestedModels).toEqual(['moonshot-v1-32k']); + }); + it('falls back to the configured backup model after retryable provider failures', async () => { const fetchMock = vi.fn() .mockResolvedValueOnce(new Response(JSON.stringify({ message: 'The engine is currently overloaded', type: 'engine_overloaded_error' }), { status: 429 })) @@ -129,6 +159,7 @@ describe('AiService', () => { messages: [{ role: 'user', content: '请完善报告内容' }], temperature: 0.3, top_p: 0.8, + n: 2, presence_penalty: 0.1, frequency_penalty: 0.1, }); @@ -137,20 +168,22 @@ describe('AiService', () => { expect(requestBody).toMatchObject({ messages: [{ role: 'user', content: '请完善报告内容' }], model: 'kimi-k2.6', + thinking: { type: 'disabled' }, }); expect(requestBody).not.toHaveProperty('temperature'); expect(requestBody).not.toHaveProperty('top_p'); + expect(requestBody).not.toHaveProperty('n'); expect(requestBody).not.toHaveProperty('presence_penalty'); expect(requestBody).not.toHaveProperty('frequency_penalty'); }); - it('uses the faster Kimi turbo text model for Kimi K2 text-only report prompts', async () => { + it('upgrades deprecated Kimi K2 text-only report prompts to Kimi K2.6 non-thinking mode', async () => { const fetchMock = vi.fn().mockResolvedValue( new Response(JSON.stringify({ choices: [{ message: { content: '{"reply":"已完善"}' } }] }), { status: 200 }), ); vi.stubGlobal('fetch', fetchMock); - await createService('kimi-k2.6', 'https://api.moonshot.cn/v1').chat(actor, { + await createService('kimi-k2-turbo-preview', 'https://api.moonshot.cn/v1').chat(actor, { messages: [{ role: 'user', content: '请继续完善手术步骤' }], temperature: 0.3, }); @@ -158,7 +191,8 @@ describe('AiService', () => { const requestBody = JSON.parse(String(fetchMock.mock.calls[0][1]?.body)); expect(requestBody).toMatchObject({ messages: [{ role: 'user', content: '请继续完善手术步骤' }], - model: 'kimi-k2-turbo-preview', + model: 'kimi-k2.6', + thinking: { type: 'disabled' }, }); expect(requestBody).not.toHaveProperty('temperature'); }); @@ -184,6 +218,7 @@ describe('AiService', () => { const requestBody = JSON.parse(String(fetchMock.mock.calls[0][1]?.body)); expect(requestBody.model).toBe('kimi-k2.6'); + expect(requestBody.thinking).toEqual({ type: 'disabled' }); expect(requestBody).not.toHaveProperty('temperature'); }); diff --git a/server/src/ai/ai.service.ts b/server/src/ai/ai.service.ts index 557de4d..3e2e06b 100644 --- a/server/src/ai/ai.service.ts +++ b/server/src/ai/ai.service.ts @@ -13,7 +13,7 @@ interface AiProvider { const RETRYABLE_PROVIDER_STATUSES = new Set([429, 500, 502, 503, 504]); const DEFAULT_RETRY_DELAYS_MS = [600, 1200]; const DEFAULT_PROVIDER_TIMEOUT_MS = 45_000; -const DEFAULT_KIMI_TEXT_MODEL = 'kimi-k2-turbo-preview'; +const DEFAULT_KIMI_TEXT_MODEL = 'kimi-k2.6'; const DEFAULT_KIMI_VISION_MODEL = 'kimi-k2.6'; @Injectable() @@ -102,13 +102,19 @@ export class AiService { private normalizeProviderPayload(payload: Record) { const model = typeof payload.model === 'string' ? payload.model : ''; - if (!/^kimi-k2(?:[.-]|$)/i.test(model)) return payload; + if (!this.isKimiK2Model(model)) return payload; const normalized = { ...payload }; delete normalized.temperature; delete normalized.top_p; + delete normalized.n; delete normalized.presence_penalty; delete normalized.frequency_penalty; + + if (this.supportsKimiThinkingToggle(model) && !('thinking' in normalized)) { + normalized.thinking = { type: 'disabled' }; + } + return normalized; } @@ -127,11 +133,11 @@ export class AiService { const model = configuredModel || (typeof input.model === 'string' ? input.model : ''); if (!this.isMoonshotProvider(provider)) return model; - const hasImages = this.hasImageInput(input.messages); - if (hasImages && !this.supportsImageInput(model)) { + const hasMedia = this.hasMediaInput(input.messages); + if (hasMedia && !this.supportsMediaInput(model)) { return process.env.AI_KIMI_VISION_MODEL || DEFAULT_KIMI_VISION_MODEL; } - if (!hasImages && this.isKimiMultimodalModel(model)) { + if (this.isDeprecatedKimiK2Model(model)) { return process.env.AI_KIMI_TEXT_MODEL || DEFAULT_KIMI_TEXT_MODEL; } return model; @@ -141,7 +147,7 @@ export class AiService { return /moonshot\.cn/i.test(provider.endpoint); } - private supportsImageInput(model: string) { + private supportsMediaInput(model: string) { return /vision/i.test(model) || this.isKimiMultimodalModel(model); } @@ -149,20 +155,37 @@ export class AiService { return /^kimi-k2\.(?:5|6)$/i.test(model); } - private hasImageInput(messages: unknown) { + private supportsKimiThinkingToggle(model: string) { + return this.isKimiMultimodalModel(model); + } + + private isKimiK2Model(model: string) { + return /^kimi-k2(?:[.-]|$)/i.test(model); + } + + private isDeprecatedKimiK2Model(model: string) { + return /^kimi-k2(?:-|$)/i.test(model); + } + + private hasMediaInput(messages: unknown) { if (!Array.isArray(messages)) return false; return messages.some((message) => { if (typeof message !== 'object' || message === null || !('content' in message)) return false; - return this.hasImageContent((message as { content?: unknown }).content); + return this.hasMediaContent((message as { content?: unknown }).content); }); } - private hasImageContent(content: unknown): boolean { + private hasMediaContent(content: unknown): boolean { if (!Array.isArray(content)) return false; return content.some((part) => ( typeof part === 'object' && part !== null && - ('image_url' in part || (part as { type?: unknown }).type === 'image_url') + ( + 'image_url' in part || + 'video_url' in part || + (part as { type?: unknown }).type === 'image_url' || + (part as { type?: unknown }).type === 'video_url' + ) )); } @@ -202,7 +225,7 @@ export class AiService { const retryDelays = this.retryDelays(); let response = await this.fetchProvider(url, init); for (const delayMs of retryDelays) { - if (!RETRYABLE_PROVIDER_STATUSES.has(response.status)) break; + if (!(await this.shouldRetryResponse(response))) break; await this.sleep(delayMs); response = await this.fetchProvider(url, init); } @@ -230,7 +253,11 @@ export class AiService { }); const responsePayload = await this.parseProviderResponse(response); - if (response.ok || index === models.length - 1 || !this.shouldFallbackFromStatus(response.status)) { + if ( + response.ok || + index === models.length - 1 || + !this.shouldFallbackFromResponse(response.status, responsePayload) + ) { return { response, responsePayload }; } @@ -246,12 +273,23 @@ export class AiService { throw lastError instanceof Error ? lastError : new BadRequestException('AI 服务请求失败'); } - private shouldFallbackFromStatus(status: number) { - return RETRYABLE_PROVIDER_STATUSES.has(status); + private async shouldRetryResponse(response: Response) { + if (!RETRYABLE_PROVIDER_STATUSES.has(response.status)) return false; + const payload = await this.parseProviderResponse(response.clone()); + return this.isRetryableProviderPayload(response.status, payload); + } + + private shouldFallbackFromResponse(status: number, payload: unknown) { + return RETRYABLE_PROVIDER_STATUSES.has(status) && this.isRetryableProviderPayload(status, payload); } private shouldFallbackFromError(error: unknown) { - return error instanceof HttpException && this.shouldFallbackFromStatus(error.getStatus()); + return error instanceof HttpException && this.shouldFallbackFromResponse(error.getStatus(), error.getResponse()); + } + + private isRetryableProviderPayload(status: number, payload: unknown) { + if (this.isQuotaOrBalanceError(status, payload)) return false; + return RETRYABLE_PROVIDER_STATUSES.has(status); } private formatProviderError(status: number, payload: unknown) { @@ -275,6 +313,8 @@ export class AiService { } private providerErrorCode(status: number, payload: unknown) { + if (this.isQuotaOrBalanceError(status, payload)) return 'AI_PROVIDER_QUOTA_EXCEEDED'; + const providerType = typeof payload === 'object' && payload !== null && 'type' in payload ? String((payload as { type: unknown }).type) @@ -289,6 +329,36 @@ export class AiService { return 'AI_PROVIDER_ERROR'; } + private isQuotaOrBalanceError(status: number, payload: unknown) { + if (status === 402) return true; + const text = this.providerErrorText(payload); + return /quota|balance|billing|insufficient|suspended/i.test(text); + } + + private providerErrorText(payload: unknown): string { + if (typeof payload === 'string') return payload; + if (typeof payload !== 'object' || payload === null) return ''; + + const error = 'error' in payload ? (payload as { error: unknown }).error : null; + const topLevel = [ + 'type' in payload ? (payload as { type: unknown }).type : '', + 'message' in payload ? (payload as { message: unknown }).message : '', + 'code' in payload ? (payload as { code: unknown }).code : '', + ]; + + if (typeof error === 'object' && error !== null) { + topLevel.push( + 'type' in error ? (error as { type: unknown }).type : '', + 'message' in error ? (error as { message: unknown }).message : '', + 'code' in error ? (error as { code: unknown }).code : '', + ); + } else if (typeof error === 'string') { + topLevel.push(error); + } + + return topLevel.filter(Boolean).join(' '); + } + private extractProviderErrorType(error: unknown) { return typeof error === 'object' && error !== null && 'type' in error ? String((error as { type: unknown }).type) diff --git a/server/src/demo/demo-defaults.ts b/server/src/demo/demo-defaults.ts index e379f5f..66bec29 100644 --- a/server/src/demo/demo-defaults.ts +++ b/server/src/demo/demo-defaults.ts @@ -20,7 +20,7 @@ export const DEMO_SYSTEM_SETTINGS = { kimi: { endpoint: 'https://api.moonshot.cn/v1', apiKey: DEMO_AI_API_KEY, - modelName: 'kimi-k2-turbo-preview', + modelName: 'kimi-k2.6', fallbackModelName: 'moonshot-v1-auto', }, deepseek: { endpoint: 'https://api.deepseek.com/v1', apiKey: '', modelName: 'deepseek-chat', fallbackModelName: '' }, diff --git a/server/src/settings/settings.service.ts b/server/src/settings/settings.service.ts index bce30b8..5a16cdd 100644 --- a/server/src/settings/settings.service.ts +++ b/server/src/settings/settings.service.ts @@ -16,7 +16,7 @@ const DEFAULT_AI_PROVIDERS = { kimi: { endpoint: 'https://api.moonshot.cn/v1', apiKey: DEMO_SYSTEM_SETTINGS.aiProviders.kimi.apiKey, - modelName: 'kimi-k2-turbo-preview', + modelName: 'kimi-k2.6', fallbackModelName: 'moonshot-v1-auto', }, deepseek: { endpoint: 'https://api.deepseek.com/v1', apiKey: '', modelName: 'deepseek-chat', fallbackModelName: '' }, @@ -277,13 +277,19 @@ export class SettingsService { ...Object.keys(input.aiProviders || {}), ]); const aiProviders = Object.fromEntries( - Array.from(providerKeys).map((key) => [ - key, - { + Array.from(providerKeys).map((key) => { + const provider = { ...(DEFAULT_AI_PROVIDERS[key as keyof typeof DEFAULT_AI_PROVIDERS] || DEFAULT_AI_PROVIDERS.custom), ...(input.aiProviders?.[key] || {}), - }, - ]), + }; + if (key === 'kimi' && /^kimi-k2(?:-|$)/i.test(provider.modelName)) { + provider.modelName = DEFAULT_AI_PROVIDERS.kimi.modelName; + } + if (key === 'kimi' && /^kimi-k2(?:-|$)/i.test(provider.fallbackModelName || '')) { + provider.fallbackModelName = DEFAULT_AI_PROVIDERS.kimi.fallbackModelName; + } + return [key, provider]; + }), ); const framePositions = [...(input.framePositions || DEFAULT_SETTINGS.framePositions)] .map((value) => Math.round(value * 100) / 100); diff --git a/src/pages/ReportEditor.tsx b/src/pages/ReportEditor.tsx index 5e67ebb..e1cb046 100644 --- a/src/pages/ReportEditor.tsx +++ b/src/pages/ReportEditor.tsx @@ -1334,7 +1334,7 @@ export default function ReportEditor() { try { const settings = storage.get('systemSettings', {} as SystemSettings); const provider = settings.aiProviders?.[settings.activeAiProvider || 'kimi']; - const modelName = provider?.modelName || 'kimi-k2-turbo-preview'; + const modelName = provider?.modelName || 'kimi-k2.6'; let actualTargetId = aiTargetRegion; if (aiModifyEnabled && actualTargetId === 'none') { const availableRegions = checkAiRegions(); @@ -1389,6 +1389,7 @@ export default function ReportEditor() { { role: 'system', content: systemPrompt }, { role: 'user', content: messageContent } ], + response_format: { type: 'json_object' }, temperature: 0.3 }; const usesFixedKimiSampling = settings.activeAiProvider === 'kimi' && /^kimi-k2(?:[.-]|$)/i.test(modelName); diff --git a/src/pages/SystemSettings.tsx b/src/pages/SystemSettings.tsx index e361d6d..b93f5cb 100644 --- a/src/pages/SystemSettings.tsx +++ b/src/pages/SystemSettings.tsx @@ -19,13 +19,19 @@ const normalizeSettings = ( ...Object.keys(input.aiProviders || {}), ]); const aiProviders = Object.fromEntries( - Array.from(providerKeys).map((key) => [ - key, - { + Array.from(providerKeys).map((key) => { + const provider = { ...(DEFAULT_AI_PROVIDERS[key] || DEFAULT_AI_PROVIDERS.custom), ...(input.aiProviders?.[key] || {}), - }, - ]), + }; + if (key === 'kimi' && /^kimi-k2(?:-|$)/i.test(provider.modelName)) { + provider.modelName = DEFAULT_AI_PROVIDERS.kimi.modelName; + } + if (key === 'kimi' && /^kimi-k2(?:-|$)/i.test(provider.fallbackModelName || '')) { + provider.fallbackModelName = DEFAULT_AI_PROVIDERS.kimi.fallbackModelName; + } + return [key, provider]; + }), ); const framePositions = normalizeFramePositions(input.framePositions, DEFAULT_FRAME_POSITIONS); @@ -105,7 +111,7 @@ export default function SystemSettings() { providers.kimi = { endpoint: (savedSettings as any).kimiApiEndpoint || providers.kimi.endpoint, apiKey: (savedSettings as any).kimiApiKey || '', - modelName: 'kimi-k2-turbo-preview', + modelName: 'kimi-k2.6', fallbackModelName: 'moonshot-v1-auto', }; } @@ -477,7 +483,7 @@ export default function SystemSettings() { next[settings.activeAiProvider] = { ...next[settings.activeAiProvider], modelName: e.target.value }; setSettings({ ...settings, aiProviders: next }); }} - placeholder="kimi-k2-5" + placeholder="kimi-k2.6" className="input-minimal" /> )} diff --git a/src/types.ts b/src/types.ts index 555ff96..40cb672 100644 --- a/src/types.ts +++ b/src/types.ts @@ -116,7 +116,7 @@ export const DEFAULT_AI_PROVIDERS: Record = { kimi: { endpoint: 'https://api.moonshot.cn/v1', apiKey: '', - modelName: 'kimi-k2-turbo-preview', + modelName: 'kimi-k2.6', fallbackModelName: 'moonshot-v1-auto' }, deepseek: { endpoint: 'https://api.deepseek.com/v1', apiKey: '', modelName: 'deepseek-chat', fallbackModelName: '' },