fix(discord): quiet fatal realtime voice startup failures

This commit is contained in:
Peter Steinberger 2026-05-13 11:51:09 +01:00
parent 74a809a1cd
commit 0832343197
No known key found for this signature in database
7 changed files with 205 additions and 13 deletions

View file

@ -9,6 +9,7 @@ Docs: https://docs.openclaw.ai
- Harden trusted-proxy source validation [AI]. (#81290) Thanks @pgondhi987.
- Agents: add permissive item schemas to array tool parameters before provider submission, preventing OpenAI-compatible schema validation from rejecting plugin tools that omit `items`. Fixes #81175. (#81217) Thanks @JARVIS-Glasses.
- Agents: escalate LLM idle watchdog timeouts through profile rotation and configured model fallback instead of leaving agent turns stuck after a silent model stream. Fixes #76877. (#80449) Thanks @jimdawdy-hub.
- Discord voice: treat OpenAI Realtime startup auth failures as fatal, suppress duplicate realtime error logs, and stop autoJoin from retrying the same broken voice channel until credentials are fixed.
- ACPX: stop forwarding unsupported timeout config options to Claude ACP while preserving OpenClaw's own turn timeout. (#80812) Thanks @sxxtony.
- Session transcripts: redact sensitive message content in the centralized JSONL append path so CLI turns, gateway transcript injection, transcript mirrors, and guarded tool results use the same configured redaction behavior. Fixes #73565. Refs #73563. (#79645) Thanks @Ziy1-Tan.
- Channels/iMessage: ignore Apple link-preview plugin payload attachments when users paste URLs, keeping the URL text while avoiding phantom media context. (#79374) Thanks @homer-byte.

View file

@ -1,6 +1,6 @@
import { agentCommandFromIngress } from "openclaw/plugin-sdk/agent-runtime";
import type { DiscordAccountConfig, OpenClawConfig } from "openclaw/plugin-sdk/config-contracts";
import type { RuntimeEnv } from "openclaw/plugin-sdk/runtime-env";
import { createSubsystemLogger, type RuntimeEnv } from "openclaw/plugin-sdk/runtime-env";
import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
import { formatMention } from "../mentions.js";
import { normalizeDiscordSlug } from "../monitor/allow-list.js";
@ -11,6 +11,8 @@ import type { DiscordVoiceSpeakerContextResolver } from "./speaker-context.js";
export const DISCORD_VOICE_MESSAGE_PROVIDER = "discord-voice";
const logger = createSubsystemLogger("discord/voice");
export type DiscordVoiceIngressContext = {
extraSystemPrompt?: string;
senderIsOwner: boolean;
@ -22,6 +24,42 @@ export type DiscordVoiceAgentTurnResult = {
text: string;
};
function summarizeAgentTurnPayloads(payloads: readonly unknown[]): string {
let textPayloads = 0;
let nonEmptyTextPayloads = 0;
let reasoningPayloads = 0;
let errorPayloads = 0;
let mediaPayloads = 0;
for (const payload of payloads) {
if (!payload || typeof payload !== "object") {
continue;
}
const record = payload as Record<string, unknown>;
const text = record.text;
if (typeof text === "string") {
textPayloads += 1;
if (text.trim()) {
nonEmptyTextPayloads += 1;
}
}
if (record.isReasoning === true) {
reasoningPayloads += 1;
}
if (record.isError === true) {
errorPayloads += 1;
}
if (
typeof record.mediaUrl === "string" ||
(Array.isArray(record.mediaUrls) && record.mediaUrls.length > 0)
) {
mediaPayloads += 1;
}
}
return `payloadCount=${payloads.length} textPayloads=${textPayloads} nonEmptyTextPayloads=${nonEmptyTextPayloads} reasoningPayloads=${reasoningPayloads} errorPayloads=${errorPayloads} mediaPayloads=${mediaPayloads}`;
}
export async function resolveDiscordVoiceIngressContext(params: {
entry: VoiceSessionEntry;
userId: string;
@ -108,12 +146,19 @@ export async function runDiscordVoiceAgentTurn(params: {
},
params.runtime,
);
const payloads = result.payloads ?? [];
const text = payloads
.map((payload) => payload.text)
.filter((entry) => typeof entry === "string" && entry.trim())
.join("\n")
.trim();
if (!text) {
logger.info(
`discord voice: agent turn produced no speakable payloads guild=${params.entry.guildId} channel=${params.entry.channelId} voiceSession=${params.entry.voiceSessionKey} supervisorSession=${params.entry.route.sessionKey} agent=${params.entry.route.agentId} user=${params.userId} ${summarizeAgentTurnPayloads(payloads)}`,
);
}
return {
context,
text: (result.payloads ?? [])
.map((payload) => payload.text)
.filter((text) => typeof text === "string" && text.trim())
.join("\n")
.trim(),
text,
};
}

View file

@ -575,6 +575,24 @@ describe("DiscordVoiceManager", () => {
expectConnectedStatus(manager, "1002");
});
it("suppresses repeated autoJoin attempts after fatal realtime startup failures", async () => {
realtimeSessionMock.connect.mockRejectedValueOnce(new Error("Incorrect API key provided"));
const manager = createManager({
voice: {
enabled: true,
mode: "agent-proxy",
autoJoin: [{ guildId: "g1", channelId: "1001" }],
},
});
await manager.autoJoin();
await manager.autoJoin();
expect(joinVoiceChannelMock).toHaveBeenCalledTimes(1);
expect(realtimeSessionMock.connect).toHaveBeenCalledTimes(1);
expect(manager.status()).toStrictEqual([]);
});
it("rejects joins outside configured allowed voice channels", async () => {
const manager = createManager({
voice: {

View file

@ -64,6 +64,15 @@ import { DiscordVoiceSpeakerContextResolver } from "./speaker-context.js";
const logger = createSubsystemLogger("discord/voice");
const VOICE_LOG_PREVIEW_CHARS = 500;
const DISCORD_VOICE_FATAL_AUTOJOIN_ERROR_PATTERNS = [
"api key missing",
"incorrect api key",
"invalid api key",
"unauthorized",
"authentication",
"permission denied",
"forbidden",
];
type DiscordVoiceSdk = ReturnType<typeof loadDiscordVoiceSdk>;
type DiscordVoiceConnection = ReturnType<DiscordVoiceSdk["joinVoiceChannel"]>;
@ -135,6 +144,17 @@ function isVoiceChannelAllowed(params: {
);
}
function formatAutoJoinFailureKey(entry: { guildId: string; channelId: string }): string {
return `${entry.guildId}:${entry.channelId}`;
}
function isFatalAutoJoinFailure(message: string): boolean {
const normalized = message.toLowerCase();
return DISCORD_VOICE_FATAL_AUTOJOIN_ERROR_PATTERNS.some((pattern) =>
normalized.includes(pattern),
);
}
function startAutoJoin(manager: Pick<DiscordVoiceManager, "autoJoin">) {
void manager
.autoJoin()
@ -195,6 +215,10 @@ export class DiscordVoiceManager {
private botUserId?: string;
private readonly voiceEnabled: boolean;
private autoJoinTask: Promise<void> | null = null;
private readonly fatalAutoJoinFailures = new Map<
string,
{ message: string; skipLogged: boolean }
>();
private readonly ownerAllowFrom?: string[];
private readonly speakerContext: DiscordVoiceSpeakerContextResolver;
private readonly allowedChannels: VoiceChannelResidency[] | null;
@ -270,6 +294,17 @@ export class DiscordVoiceManager {
}
for (const entry of entriesByGuild.values()) {
const failureKey = formatAutoJoinFailureKey(entry);
const fatalFailure = this.fatalAutoJoinFailures.get(failureKey);
if (fatalFailure) {
if (!fatalFailure.skipLogged) {
logger.warn(
`discord voice: autoJoin suppressed guild=${entry.guildId} channel=${entry.channelId} after fatal startup failure; retry with /vc join or reload config after fixing credentials: ${fatalFailure.message}`,
);
fatalFailure.skipLogged = true;
}
continue;
}
logVoiceVerbose(`autoJoin: joining guild ${entry.guildId} channel ${entry.channelId}`);
const result = await this.join({
guildId: entry.guildId,
@ -279,6 +314,12 @@ export class DiscordVoiceManager {
logger.warn(
`discord voice: autoJoin skipped guild=${entry.guildId} channel=${entry.channelId}: ${result.message}`,
);
if (isFatalAutoJoinFailure(result.message)) {
this.fatalAutoJoinFailures.set(failureKey, {
message: result.message,
skipLogged: false,
});
}
}
}
})().finally(() => {
@ -624,6 +665,7 @@ export class DiscordVoiceManager {
player.on("error", playerErrorHandler);
this.sessions.set(guildId, entry);
this.fatalAutoJoinFailures.delete(formatAutoJoinFailureKey({ guildId, channelId }));
logger.info(
`discord voice: joined guild=${guildId} channel=${channelId} mode=${voiceMode} agent=${route.agentId} voiceSession=${voiceRoute.sessionKey} supervisorSession=${route.sessionKey} voiceModel=${voiceConfig?.model ?? "route-default"}`,
);

View file

@ -45,6 +45,7 @@ const DISCORD_REALTIME_RECENT_AGENT_PROXY_CONSULT_TTL_MS = 15_000;
const DISCORD_REALTIME_LOG_PREVIEW_CHARS = 500;
const DISCORD_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS = 250;
const DISCORD_REALTIME_FORCED_CONSULT_FALLBACK_DELAY_MS = 200;
const DISCORD_REALTIME_DUPLICATE_ERROR_SUPPRESS_MS = 60_000;
const REALTIME_PCM16_BYTES_PER_SAMPLE = 2;
const DISCORD_REALTIME_FORCED_CONSULT_TRAILING_FRAGMENT_WORDS = new Set([
"a",
@ -332,6 +333,9 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
private queuedExactSpeechMessages: string[] = [];
private exactSpeechResponseActive = false;
private exactSpeechAudioStarted = false;
private lastRealtimeError:
| { message: string; suppressed: number; lastLoggedAt: number }
| undefined;
private readonly playerIdleHandler = () => {
this.resetOutputStream("player-idle");
this.completeExactSpeechResponse("player-idle");
@ -453,9 +457,11 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
logger.info(interruptionLog);
}
},
onError: (error) =>
logger.warn(`discord voice: realtime error: ${formatErrorMessage(error)}`),
onClose: (reason) => logVoiceVerbose(`realtime closed: ${reason}`),
onError: (error) => this.logRealtimeError(formatErrorMessage(error)),
onClose: (reason) => {
this.flushSuppressedRealtimeErrors();
logVoiceVerbose(`realtime closed: ${reason}`);
},
});
const resolvedModel =
readProviderConfigString(resolved.providerConfig, "model") ?? resolved.provider.defaultModel;
@ -478,6 +484,7 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
close(): void {
this.stopped = true;
this.flushSuppressedRealtimeErrors();
this.talkback.close();
this.clearForcedConsultTimers();
this.pendingAgentProxyConsultContexts = [];
@ -493,6 +500,30 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
this.params.entry.player.off(voiceSdk.AudioPlayerStatus.Idle, this.playerIdleHandler);
}
private logRealtimeError(message: string): void {
const now = Date.now();
if (
this.lastRealtimeError?.message === message &&
now - this.lastRealtimeError.lastLoggedAt < DISCORD_REALTIME_DUPLICATE_ERROR_SUPPRESS_MS
) {
this.lastRealtimeError.suppressed += 1;
return;
}
this.flushSuppressedRealtimeErrors();
this.lastRealtimeError = { message, suppressed: 0, lastLoggedAt: now };
logger.warn(`discord voice: realtime error: ${message}`);
}
private flushSuppressedRealtimeErrors(): void {
if (!this.lastRealtimeError || this.lastRealtimeError.suppressed === 0) {
return;
}
logger.warn(
`discord voice: suppressed ${this.lastRealtimeError.suppressed} duplicate realtime errors: ${this.lastRealtimeError.message}`,
);
this.lastRealtimeError.suppressed = 0;
}
beginSpeakerTurn(context: VoiceRealtimeSpeakerContext, userId: string): VoiceRealtimeSpeakerTurn {
const turn: PendingSpeakerTurn = {
context: { ...context, userId },

View file

@ -772,6 +772,48 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
expect(bridge.isConnected()).toBe(false);
});
it("treats pre-ready auth errors as a single startup failure", async () => {
const provider = buildOpenAIRealtimeVoiceProvider();
const onError = vi.fn();
const bridge = provider.createBridge({
providerConfig: { apiKey: "sk-test" }, // pragma: allowlist secret
onAudio: vi.fn(),
onClearAudio: vi.fn(),
onError,
});
const connecting = bridge.connect();
const socket = FakeWebSocket.instances[0];
if (!socket) {
throw new Error("expected bridge to create a websocket");
}
socket.readyState = FakeWebSocket.OPEN;
socket.emit("open");
socket.emit(
"message",
Buffer.from(
JSON.stringify({
type: "error",
error: { message: "Incorrect API key provided" },
}),
),
);
socket.emit(
"message",
Buffer.from(
JSON.stringify({
type: "error",
error: { message: "Incorrect API key provided" },
}),
),
);
await expect(connecting).rejects.toThrow("Incorrect API key provided");
expect(onError).not.toHaveBeenCalled();
expect(socket.closed).toBe(true);
expect(bridge.isConnected()).toBe(false);
});
it("rejects connection when the socket closes before session readiness", async () => {
const provider = buildOpenAIRealtimeVoiceProvider();
const bridge = provider.createBridge({

View file

@ -508,6 +508,14 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
});
this.ws = ws;
const rejectStartup = (error: Error) => {
settleReject(error);
if (ws.readyState !== WebSocket.CLOSED) {
this.intentionallyClosed = true;
ws.close(1000, "startup failed");
}
};
ws.on("open", () => {
this.resetRealtimeSessionState();
this.connected = true;
@ -527,6 +535,9 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
});
ws.on("message", (data: Buffer) => {
if (settled && !this.sessionConfigured) {
return;
}
captureWsEvent({
url,
direction: "inbound",
@ -540,13 +551,14 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
});
try {
const event = JSON.parse(data.toString()) as RealtimeEvent;
if (event.type === "error" && !this.sessionConfigured) {
rejectStartup(new Error(readRealtimeErrorDetail(event.error)));
return;
}
this.handleEvent(event);
if (event.type === "session.updated") {
settleResolve();
}
if (event.type === "error" && !this.sessionConfigured) {
settleReject(new Error(readRealtimeErrorDetail(event.error)));
}
} catch (error) {
console.error("[openai] realtime event parse failed:", error);
}
@ -565,7 +577,8 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
},
});
if (!this.sessionConfigured) {
settleReject(error instanceof Error ? error : new Error(String(error)));
rejectStartup(error instanceof Error ? error : new Error(String(error)));
return;
}
this.config.onError?.(error instanceof Error ? error : new Error(String(error)));
});