From 8d2f98fb01a6eb3fdb8b30645917ff4fde971f35 Mon Sep 17 00:00:00 2001 From: Tyler Yust <64381258+tyler6204@users.noreply.github.com> Date: Mon, 2 Feb 2026 02:06:14 -0800 Subject: [PATCH] Fix subagent announce failover race (always emit lifecycle end + treat timeout=0 as no-timeout) (#6621) * Fix subagent announce race and timeout handling Bug 1: Subagent announce fires before model failover retries finish - Problem: CLI provider emitted lifecycle error on each attempt, causing subagent registry to prematurely call beginSubagentCleanup() and announce with incorrect status before failover retries completed - Fix: Removed lifecycle error emission from CLI provider's attempt-level .catch() in agent-runner-execution.ts. Errors still propagate to runWithModelFallback for retry, but no intermediate lifecycle events are emitted. Only the final outcome (after all retries) emits lifecycle events. Bug 2: Hard 600s per-prompt timeout ignores runTimeoutSeconds=0 - Problem: When runTimeoutSeconds=0 (meaning 'no timeout'), the code returned the default 600s timeout instead of respecting the 0 setting - Fix: Modified resolveAgentTimeoutMs() to treat 0 as 'no timeout' and return a very large timeout value (30 days) instead of the default. This avoids setTimeout issues with Infinity while effectively providing unlimited time for long-running tasks. * fix: emit lifecycle:error for CLI failures (#6621) (thanks @tyler6204) * chore: satisfy format/lint gates (#6621) (thanks @tyler6204) * fix: restore build after upstream type changes (#6621) (thanks @tyler6204) * test: fix createSystemPromptOverride tests to match new return type (#6621) (thanks @tyler6204) --- extensions/bluebubbles/src/monitor.ts | 7 +- extensions/matrix/CHANGELOG.md | 2 + extensions/msteams/CHANGELOG.md | 2 + extensions/nostr/CHANGELOG.md | 2 + extensions/twitch/CHANGELOG.md | 2 + extensions/voice-call/CHANGELOG.md | 2 + extensions/zalo/CHANGELOG.md | 2 + extensions/zalouser/CHANGELOG.md | 2 + src/agents/auth-profiles/oauth.ts | 6 +- ...-runner.createsystempromptoverride.test.ts | 6 +- .../pi-embedded-runner/system-prompt.ts | 7 +- src/agents/pi-tool-definition-adapter.ts | 4 +- src/agents/timeout.ts | 13 +++- .../reply/agent-runner-execution.ts | 66 ++++++++++++------- 14 files changed, 81 insertions(+), 42 deletions(-) diff --git a/extensions/bluebubbles/src/monitor.ts b/extensions/bluebubbles/src/monitor.ts index 45584057cb..eafb6170e1 100644 --- a/extensions/bluebubbles/src/monitor.ts +++ b/extensions/bluebubbles/src/monitor.ts @@ -2167,10 +2167,9 @@ async function processMessage( sendBlueBubblesTyping(chatGuidForActions, true, { cfg: config, accountId: account.accountId, - }) - .catch((err) => { - runtime.error?.(`[bluebubbles] typing restart failed: ${String(err)}`); - }); + }).catch((err) => { + runtime.error?.(`[bluebubbles] typing restart failed: ${String(err)}`); + }); }, typingRestartDelayMs); }; try { diff --git a/extensions/matrix/CHANGELOG.md b/extensions/matrix/CHANGELOG.md index 962c64e174..aab74ff715 100644 --- a/extensions/matrix/CHANGELOG.md +++ b/extensions/matrix/CHANGELOG.md @@ -3,11 +3,13 @@ ## 2026.2.1 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.31 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.30 diff --git a/extensions/msteams/CHANGELOG.md b/extensions/msteams/CHANGELOG.md index c830ed6027..a2d147d190 100644 --- a/extensions/msteams/CHANGELOG.md +++ b/extensions/msteams/CHANGELOG.md @@ -3,11 +3,13 @@ ## 2026.2.1 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.31 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.30 diff --git a/extensions/nostr/CHANGELOG.md b/extensions/nostr/CHANGELOG.md index 2693025fe3..b3c4bdbc97 100644 --- a/extensions/nostr/CHANGELOG.md +++ b/extensions/nostr/CHANGELOG.md @@ -3,11 +3,13 @@ ## 2026.2.1 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.31 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.30 diff --git a/extensions/twitch/CHANGELOG.md b/extensions/twitch/CHANGELOG.md index 540689e336..bbb6b24e19 100644 --- a/extensions/twitch/CHANGELOG.md +++ b/extensions/twitch/CHANGELOG.md @@ -3,11 +3,13 @@ ## 2026.2.1 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.31 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.30 diff --git a/extensions/voice-call/CHANGELOG.md b/extensions/voice-call/CHANGELOG.md index 0f7f84ff7a..bf0bf691ea 100644 --- a/extensions/voice-call/CHANGELOG.md +++ b/extensions/voice-call/CHANGELOG.md @@ -3,11 +3,13 @@ ## 2026.2.1 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.31 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.30 diff --git a/extensions/zalo/CHANGELOG.md b/extensions/zalo/CHANGELOG.md index 7d69913f20..9a2ebb7c7d 100644 --- a/extensions/zalo/CHANGELOG.md +++ b/extensions/zalo/CHANGELOG.md @@ -3,11 +3,13 @@ ## 2026.2.1 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.31 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.30 diff --git a/extensions/zalouser/CHANGELOG.md b/extensions/zalouser/CHANGELOG.md index 78447c7202..f5b5437340 100644 --- a/extensions/zalouser/CHANGELOG.md +++ b/extensions/zalouser/CHANGELOG.md @@ -3,11 +3,13 @@ ## 2026.2.1 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.31 ### Changes + - Version alignment with core OpenClaw release numbers. ## 2026.1.30 diff --git a/src/agents/auth-profiles/oauth.ts b/src/agents/auth-profiles/oauth.ts index b5a52dd277..064b72f549 100644 --- a/src/agents/auth-profiles/oauth.ts +++ b/src/agents/auth-profiles/oauth.ts @@ -15,12 +15,10 @@ import { ensureAuthStoreFile, resolveAuthStorePath } from "./paths.js"; import { suggestOAuthProfileIdForLegacyDefault } from "./repair.js"; import { ensureAuthProfileStore, saveAuthProfileStore } from "./store.js"; -const OAUTH_PROVIDER_IDS = new Set( - getOAuthProviders().map((provider) => provider.id), -); +const OAUTH_PROVIDER_IDS = new Set(getOAuthProviders().map((provider) => provider.id)); const isOAuthProvider = (provider: string): provider is OAuthProvider => - OAUTH_PROVIDER_IDS.has(provider as OAuthProvider); + OAUTH_PROVIDER_IDS.has(provider); const resolveOAuthProvider = (provider: string): OAuthProvider | null => isOAuthProvider(provider) ? provider : null; diff --git a/src/agents/pi-embedded-runner.createsystempromptoverride.test.ts b/src/agents/pi-embedded-runner.createsystempromptoverride.test.ts index cd2a13fe4f..d402a5b2ba 100644 --- a/src/agents/pi-embedded-runner.createsystempromptoverride.test.ts +++ b/src/agents/pi-embedded-runner.createsystempromptoverride.test.ts @@ -99,12 +99,12 @@ const _readSessionMessages = async (sessionFile: string) => { }; describe("createSystemPromptOverride", () => { - it("returns the override prompt regardless of default prompt", () => { + it("returns the override prompt trimmed", () => { const override = createSystemPromptOverride("OVERRIDE"); - expect(override("DEFAULT")).toBe("OVERRIDE"); + expect(override).toBe("OVERRIDE"); }); it("returns an empty string for blank overrides", () => { const override = createSystemPromptOverride(" \n "); - expect(override("DEFAULT")).toBe(""); + expect(override).toBe(""); }); }); diff --git a/src/agents/pi-embedded-runner/system-prompt.ts b/src/agents/pi-embedded-runner/system-prompt.ts index b20be1b118..6384bc7e46 100644 --- a/src/agents/pi-embedded-runner/system-prompt.ts +++ b/src/agents/pi-embedded-runner/system-prompt.ts @@ -74,11 +74,8 @@ export function buildEmbeddedSystemPrompt(params: { }); } -export function createSystemPromptOverride( - systemPrompt: string, -): (defaultPrompt?: string) => string { - const override = systemPrompt.trim(); - return (_defaultPrompt?: string) => override; +export function createSystemPromptOverride(systemPrompt: string): string { + return systemPrompt.trim(); } export function applySystemPromptOverrideToSession(session: AgentSession, override: string) { diff --git a/src/agents/pi-tool-definition-adapter.ts b/src/agents/pi-tool-definition-adapter.ts index a190f6daed..6b97690393 100644 --- a/src/agents/pi-tool-definition-adapter.ts +++ b/src/agents/pi-tool-definition-adapter.ts @@ -40,9 +40,9 @@ export function toToolDefinitions(tools: AnyAgentTool[]): ToolDefinition[] { execute: async ( toolCallId, params, + signal: AbortSignal | undefined, onUpdate: AgentToolUpdateCallback | undefined, _ctx, - signal, ): Promise> => { try { return await tool.execute(toolCallId, params, signal, onUpdate); @@ -91,9 +91,9 @@ export function toClientToolDefinitions( execute: async ( toolCallId, params, + _signal: AbortSignal | undefined, _onUpdate: AgentToolUpdateCallback | undefined, _ctx, - _signal, ): Promise> => { const outcome = await runBeforeToolCallHook({ toolName: func.name, diff --git a/src/agents/timeout.ts b/src/agents/timeout.ts index 9d33f96b63..6b38b4b04c 100644 --- a/src/agents/timeout.ts +++ b/src/agents/timeout.ts @@ -19,16 +19,25 @@ export function resolveAgentTimeoutMs(opts: { }): number { const minMs = Math.max(normalizeNumber(opts.minMs) ?? 1, 1); const defaultMs = resolveAgentTimeoutSeconds(opts.cfg) * 1000; + // Use a very large timeout value (30 days) to represent "no timeout" + // when explicitly set to 0. This avoids setTimeout issues with Infinity. + const NO_TIMEOUT_MS = 30 * 24 * 60 * 60 * 1000; const overrideMs = normalizeNumber(opts.overrideMs); if (overrideMs !== undefined) { - if (overrideMs <= 0) { + if (overrideMs === 0) { + return NO_TIMEOUT_MS; + } + if (overrideMs < 0) { return defaultMs; } return Math.max(overrideMs, minMs); } const overrideSeconds = normalizeNumber(opts.overrideSeconds); if (overrideSeconds !== undefined) { - if (overrideSeconds <= 0) { + if (overrideSeconds === 0) { + return NO_TIMEOUT_MS; + } + if (overrideSeconds < 0) { return defaultMs; } return Math.max(overrideSeconds * 1000, minMs); diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index ebd58b38d0..3bdc5dde39 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -172,24 +172,27 @@ export async function runAgentTurnWithFallback(params: { }, }); const cliSessionId = getCliSessionId(params.getActiveSessionEntry(), provider); - return runCliAgent({ - sessionId: params.followupRun.run.sessionId, - sessionKey: params.sessionKey, - sessionFile: params.followupRun.run.sessionFile, - workspaceDir: params.followupRun.run.workspaceDir, - config: params.followupRun.run.config, - prompt: params.commandBody, - provider, - model, - thinkLevel: params.followupRun.run.thinkLevel, - timeoutMs: params.followupRun.run.timeoutMs, - runId, - extraSystemPrompt: params.followupRun.run.extraSystemPrompt, - ownerNumbers: params.followupRun.run.ownerNumbers, - cliSessionId, - images: params.opts?.images, - }) - .then((result) => { + return (async () => { + let lifecycleTerminalEmitted = false; + try { + const result = await runCliAgent({ + sessionId: params.followupRun.run.sessionId, + sessionKey: params.sessionKey, + sessionFile: params.followupRun.run.sessionFile, + workspaceDir: params.followupRun.run.workspaceDir, + config: params.followupRun.run.config, + prompt: params.commandBody, + provider, + model, + thinkLevel: params.followupRun.run.thinkLevel, + timeoutMs: params.followupRun.run.timeoutMs, + runId, + extraSystemPrompt: params.followupRun.run.extraSystemPrompt, + ownerNumbers: params.followupRun.run.ownerNumbers, + cliSessionId, + images: params.opts?.images, + }); + // CLI backends don't emit streaming assistant events, so we need to // emit one with the final text so server-chat can populate its buffer // and send the response to TUI/WebSocket clients. @@ -201,6 +204,7 @@ export async function runAgentTurnWithFallback(params: { data: { text: cliText }, }); } + emitAgentEvent({ runId, stream: "lifecycle", @@ -210,9 +214,10 @@ export async function runAgentTurnWithFallback(params: { endedAt: Date.now(), }, }); + lifecycleTerminalEmitted = true; + return result; - }) - .catch((err) => { + } catch (err) { emitAgentEvent({ runId, stream: "lifecycle", @@ -220,11 +225,28 @@ export async function runAgentTurnWithFallback(params: { phase: "error", startedAt, endedAt: Date.now(), - error: err instanceof Error ? err.message : String(err), + error: String(err), }, }); + lifecycleTerminalEmitted = true; throw err; - }); + } finally { + // Defensive backstop: never let a CLI run complete without a terminal + // lifecycle event, otherwise downstream consumers can hang. + if (!lifecycleTerminalEmitted) { + emitAgentEvent({ + runId, + stream: "lifecycle", + data: { + phase: "error", + startedAt, + endedAt: Date.now(), + error: "CLI run completed without lifecycle terminal event", + }, + }); + } + } + })(); } const authProfileId = provider === params.followupRun.run.provider