From 0deb8b0da187ed3a5ca1b0dc37736548795fbe31 Mon Sep 17 00:00:00 2001 From: Tyler Yust <64381258+tyler6204@users.noreply.github.com> Date: Sat, 7 Feb 2026 17:40:51 -0800 Subject: [PATCH] fix: recover from context overflow caused by oversized tool results (#11579) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: gracefully handle oversized tool results causing context overflow When a subagent reads a very large file or gets a huge tool result (e.g., gh pr diff on a massive PR), it can exceed the model's context window in a single prompt. Auto-compaction can't help because there's no older history to compact — just one giant tool result. This adds two layers of defense: 1. Pre-emptive: Hard cap on tool result size (400K chars ≈ 100K tokens) applied in the session tool result guard before persistence. This prevents extremely large tool results from being stored in full, regardless of model context window size. 2. Recovery: When context overflow is detected and compaction fails, scan session messages for oversized tool results relative to the model's actual context window (30% max share). If found, truncate them in the session via branching (creating a new branch with truncated content) and retry the prompt. The truncation preserves the beginning of the content (most useful for understanding what was read) and appends a notice explaining the truncation and suggesting offset/limit parameters for targeted reads. Includes comprehensive tests for: - Text truncation with newline-boundary awareness - Context-window-proportional size calculation - In-memory message truncation - Oversized detection heuristics - Guard-level size capping during persistence * fix: prep fixes for tool result truncation PR (#11579) (thanks @tyler6204) --- CHANGELOG.md | 1 + src/agents/pi-embedded-runner/run.ts | 46 +++ .../tool-result-truncation.test.ts | 215 ++++++++++++ .../tool-result-truncation.ts | 328 ++++++++++++++++++ src/agents/session-tool-result-guard.test.ts | 63 ++++ src/agents/session-tool-result-guard.ts | 73 +++- 6 files changed, 725 insertions(+), 1 deletion(-) create mode 100644 src/agents/pi-embedded-runner/tool-result-truncation.test.ts create mode 100644 src/agents/pi-embedded-runner/tool-result-truncation.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 701c09866f..ac4b57f6d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Agents: recover from context overflow caused by oversized tool results (pre-emptive capping + fallback truncation). (#11579) Thanks @tyler6204. - Cron: scheduler reliability (timer drift, restart catch-up, lock contention, stale running markers). (#10776) Thanks @tyler6204. - Cron: store migration hardening (legacy field migration, parse error handling, explicit delivery mode persistence). (#10776) Thanks @tyler6204. - Gateway/CLI: when `gateway.bind=lan`, use a LAN IP for probe URLs and Control UI links. (#11448) Thanks @AnonO6. diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index c8ca9b5a19..97ff88cb60 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -52,6 +52,10 @@ import { log } from "./logger.js"; import { resolveModel } from "./model.js"; import { runEmbeddedAttempt } from "./run/attempt.js"; import { buildEmbeddedRunPayloads } from "./run/payloads.js"; +import { + truncateOversizedToolResultsInSession, + sessionLikelyHasOversizedToolResults, +} from "./tool-result-truncation.js"; import { describeUnknownError } from "./utils.js"; type ApiKeyInfo = ResolvedProviderAuth; @@ -321,6 +325,7 @@ export async function runEmbeddedPiAgent( const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3; let overflowCompactionAttempts = 0; + let toolResultTruncationAttempted = false; try { while (true) { attemptedThinking.add(thinkLevel); @@ -437,6 +442,47 @@ export async function runEmbeddedPiAgent( `auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`, ); } + + // Fallback: try truncating oversized tool results in the session. + // This handles the case where a single tool result (e.g., reading a + // huge file or getting a massive PR diff) exceeds the context window, + // and compaction can't help because there's no older history to compact. + if (!toolResultTruncationAttempted) { + const contextWindowTokens = ctxInfo.tokens; + const hasOversized = attempt.messagesSnapshot + ? sessionLikelyHasOversizedToolResults({ + messages: attempt.messagesSnapshot, + contextWindowTokens, + }) + : false; + + if (hasOversized) { + toolResultTruncationAttempted = true; + log.warn( + `[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` + + `(contextWindow=${contextWindowTokens} tokens)`, + ); + const truncResult = await truncateOversizedToolResultsInSession({ + sessionFile: params.sessionFile, + contextWindowTokens, + sessionId: params.sessionId, + sessionKey: params.sessionKey, + }); + if (truncResult.truncated) { + log.info( + `[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`, + ); + // Reset compaction attempts so compaction can be tried again + // after truncation (the session is now smaller) + overflowCompactionAttempts = 0; + continue; + } + log.warn( + `[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`, + ); + } + } + const kind = isCompactionFailure ? "compaction_failure" : "context_overflow"; return { payloads: [ diff --git a/src/agents/pi-embedded-runner/tool-result-truncation.test.ts b/src/agents/pi-embedded-runner/tool-result-truncation.test.ts new file mode 100644 index 0000000000..2cd27a042b --- /dev/null +++ b/src/agents/pi-embedded-runner/tool-result-truncation.test.ts @@ -0,0 +1,215 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { describe, expect, it } from "vitest"; +import { + truncateToolResultText, + calculateMaxToolResultChars, + truncateOversizedToolResultsInMessages, + isOversizedToolResult, + sessionLikelyHasOversizedToolResults, + HARD_MAX_TOOL_RESULT_CHARS, +} from "./tool-result-truncation.js"; + +function makeToolResult(text: string, toolCallId = "call_1"): AgentMessage { + return { + role: "toolResult", + toolCallId, + toolName: "read", + content: [{ type: "text", text }], + isError: false, + timestamp: Date.now(), + } as AgentMessage; +} + +function makeUserMessage(text: string): AgentMessage { + return { + role: "user", + content: text, + timestamp: Date.now(), + } as AgentMessage; +} + +function makeAssistantMessage(text: string): AgentMessage { + return { + role: "assistant", + content: [{ type: "text", text }], + api: "messages", + provider: "anthropic", + model: "claude-sonnet-4-20250514", + usage: { + inputTokens: 0, + outputTokens: 0, + cacheReadInputTokens: 0, + cacheCreationInputTokens: 0, + }, + stopReason: "end_turn", + timestamp: Date.now(), + } as AgentMessage; +} + +describe("truncateToolResultText", () => { + it("returns text unchanged when under limit", () => { + const text = "hello world"; + expect(truncateToolResultText(text, 1000)).toBe(text); + }); + + it("truncates text that exceeds limit", () => { + const text = "a".repeat(10_000); + const result = truncateToolResultText(text, 5_000); + expect(result.length).toBeLessThan(text.length); + expect(result).toContain("truncated"); + }); + + it("preserves at least MIN_KEEP_CHARS (2000)", () => { + const text = "x".repeat(50_000); + const result = truncateToolResultText(text, 100); // Even with small limit + expect(result.length).toBeGreaterThan(2000); + }); + + it("tries to break at newline boundary", () => { + const lines = Array.from({ length: 100 }, (_, i) => `line ${i}: ${"x".repeat(50)}`).join("\n"); + const result = truncateToolResultText(lines, 3000); + // Should contain truncation notice + expect(result).toContain("truncated"); + // The truncated content should be shorter than the original + expect(result.length).toBeLessThan(lines.length); + // Extract the kept content (before the truncation suffix marker) + const suffixIndex = result.indexOf("\n\n⚠️"); + if (suffixIndex > 0) { + const keptContent = result.slice(0, suffixIndex); + // Should end at a newline boundary (i.e., the last char before suffix is a complete line) + const lastNewline = keptContent.lastIndexOf("\n"); + // The last newline should be near the end (within the last line) + expect(lastNewline).toBeGreaterThan(keptContent.length - 100); + } + }); +}); + +describe("calculateMaxToolResultChars", () => { + it("scales with context window size", () => { + const small = calculateMaxToolResultChars(32_000); + const large = calculateMaxToolResultChars(200_000); + expect(large).toBeGreaterThan(small); + }); + + it("caps at HARD_MAX_TOOL_RESULT_CHARS for very large windows", () => { + const result = calculateMaxToolResultChars(2_000_000); // 2M token window + expect(result).toBeLessThanOrEqual(HARD_MAX_TOOL_RESULT_CHARS); + }); + + it("returns reasonable size for 128K context", () => { + const result = calculateMaxToolResultChars(128_000); + // 30% of 128K = 38.4K tokens * 4 chars = 153.6K chars + expect(result).toBeGreaterThan(100_000); + expect(result).toBeLessThan(200_000); + }); +}); + +describe("isOversizedToolResult", () => { + it("returns false for small tool results", () => { + const msg = makeToolResult("small content"); + expect(isOversizedToolResult(msg, 200_000)).toBe(false); + }); + + it("returns true for oversized tool results", () => { + const msg = makeToolResult("x".repeat(500_000)); + expect(isOversizedToolResult(msg, 128_000)).toBe(true); + }); + + it("returns false for non-toolResult messages", () => { + const msg = makeUserMessage("x".repeat(500_000)); + expect(isOversizedToolResult(msg, 128_000)).toBe(false); + }); +}); + +describe("truncateOversizedToolResultsInMessages", () => { + it("returns unchanged messages when nothing is oversized", () => { + const messages = [ + makeUserMessage("hello"), + makeAssistantMessage("using tool"), + makeToolResult("small result"), + ]; + const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages( + messages, + 200_000, + ); + expect(truncatedCount).toBe(0); + expect(result).toEqual(messages); + }); + + it("truncates oversized tool results", () => { + const bigContent = "x".repeat(500_000); + const messages = [ + makeUserMessage("hello"), + makeAssistantMessage("reading file"), + makeToolResult(bigContent), + ]; + const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages( + messages, + 128_000, + ); + expect(truncatedCount).toBe(1); + const toolResult = result[2] as { content: Array<{ text: string }> }; + expect(toolResult.content[0].text.length).toBeLessThan(bigContent.length); + expect(toolResult.content[0].text).toContain("truncated"); + }); + + it("preserves non-toolResult messages", () => { + const messages = [ + makeUserMessage("hello"), + makeAssistantMessage("reading file"), + makeToolResult("x".repeat(500_000)), + ]; + const { messages: result } = truncateOversizedToolResultsInMessages(messages, 128_000); + expect(result[0]).toBe(messages[0]); // Same reference + expect(result[1]).toBe(messages[1]); // Same reference + }); + + it("handles multiple oversized tool results", () => { + const messages = [ + makeUserMessage("hello"), + makeAssistantMessage("reading files"), + makeToolResult("x".repeat(500_000), "call_1"), + makeToolResult("y".repeat(500_000), "call_2"), + ]; + const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages( + messages, + 128_000, + ); + expect(truncatedCount).toBe(2); + for (const msg of result.slice(2)) { + const tr = msg as { content: Array<{ text: string }> }; + expect(tr.content[0].text.length).toBeLessThan(500_000); + } + }); +}); + +describe("sessionLikelyHasOversizedToolResults", () => { + it("returns false when no tool results are oversized", () => { + const messages = [makeUserMessage("hello"), makeToolResult("small result")]; + expect( + sessionLikelyHasOversizedToolResults({ + messages, + contextWindowTokens: 200_000, + }), + ).toBe(false); + }); + + it("returns true when a tool result is oversized", () => { + const messages = [makeUserMessage("hello"), makeToolResult("x".repeat(500_000))]; + expect( + sessionLikelyHasOversizedToolResults({ + messages, + contextWindowTokens: 128_000, + }), + ).toBe(true); + }); + + it("returns false for empty messages", () => { + expect( + sessionLikelyHasOversizedToolResults({ + messages: [], + contextWindowTokens: 200_000, + }), + ).toBe(false); + }); +}); diff --git a/src/agents/pi-embedded-runner/tool-result-truncation.ts b/src/agents/pi-embedded-runner/tool-result-truncation.ts new file mode 100644 index 0000000000..5d54cbf888 --- /dev/null +++ b/src/agents/pi-embedded-runner/tool-result-truncation.ts @@ -0,0 +1,328 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { TextContent } from "@mariozechner/pi-ai"; +import { SessionManager } from "@mariozechner/pi-coding-agent"; +import { log } from "./logger.js"; + +/** + * Maximum share of the context window a single tool result should occupy. + * This is intentionally conservative – a single tool result should not + * consume more than 30% of the context window even without other messages. + */ +const MAX_TOOL_RESULT_CONTEXT_SHARE = 0.3; + +/** + * Hard character limit for a single tool result text block. + * Even for the largest context windows (~2M tokens), a single tool result + * should not exceed ~400K characters (~100K tokens). + * This acts as a safety net when we don't know the context window size. + */ +export const HARD_MAX_TOOL_RESULT_CHARS = 400_000; + +/** + * Minimum characters to keep when truncating. + * We always keep at least the first portion so the model understands + * what was in the content. + */ +const MIN_KEEP_CHARS = 2_000; + +/** + * Suffix appended to truncated tool results. + */ +const TRUNCATION_SUFFIX = + "\n\n⚠️ [Content truncated — original was too large for the model's context window. " + + "The content above is a partial view. If you need more, request specific sections or use " + + "offset/limit parameters to read smaller chunks.]"; + +/** + * Truncate a single text string to fit within maxChars, preserving the beginning. + */ +export function truncateToolResultText(text: string, maxChars: number): string { + if (text.length <= maxChars) { + return text; + } + const keepChars = Math.max(MIN_KEEP_CHARS, maxChars - TRUNCATION_SUFFIX.length); + // Try to break at a newline boundary to avoid cutting mid-line + let cutPoint = keepChars; + const lastNewline = text.lastIndexOf("\n", keepChars); + if (lastNewline > keepChars * 0.8) { + cutPoint = lastNewline; + } + return text.slice(0, cutPoint) + TRUNCATION_SUFFIX; +} + +/** + * Calculate the maximum allowed characters for a single tool result + * based on the model's context window tokens. + * + * Uses a rough 4 chars ≈ 1 token heuristic (conservative for English text; + * actual ratio varies by tokenizer). + */ +export function calculateMaxToolResultChars(contextWindowTokens: number): number { + const maxTokens = Math.floor(contextWindowTokens * MAX_TOOL_RESULT_CONTEXT_SHARE); + // Rough conversion: ~4 chars per token on average + const maxChars = maxTokens * 4; + return Math.min(maxChars, HARD_MAX_TOOL_RESULT_CHARS); +} + +/** + * Get the total character count of text content blocks in a tool result message. + */ +function getToolResultTextLength(msg: AgentMessage): number { + if (!msg || (msg as { role?: string }).role !== "toolResult") { + return 0; + } + const content = (msg as { content?: unknown }).content; + if (!Array.isArray(content)) { + return 0; + } + let totalLength = 0; + for (const block of content) { + if (block && typeof block === "object" && (block as { type?: string }).type === "text") { + const text = (block as TextContent).text; + if (typeof text === "string") { + totalLength += text.length; + } + } + } + return totalLength; +} + +/** + * Truncate a tool result message's text content blocks to fit within maxChars. + * Returns a new message (does not mutate the original). + */ +function truncateToolResultMessage(msg: AgentMessage, maxChars: number): AgentMessage { + const content = (msg as { content?: unknown }).content; + if (!Array.isArray(content)) { + return msg; + } + + // Calculate total text size + const totalTextChars = getToolResultTextLength(msg); + if (totalTextChars <= maxChars) { + return msg; + } + + // Distribute the budget proportionally among text blocks + const newContent = content.map((block: unknown) => { + if (!block || typeof block !== "object" || (block as { type?: string }).type !== "text") { + return block; // Keep non-text blocks (images) as-is + } + const textBlock = block as TextContent; + if (typeof textBlock.text !== "string") { + return block; + } + // Proportional budget for this block + const blockShare = textBlock.text.length / totalTextChars; + const blockBudget = Math.max(MIN_KEEP_CHARS, Math.floor(maxChars * blockShare)); + return { + ...textBlock, + text: truncateToolResultText(textBlock.text, blockBudget), + }; + }); + + return { ...msg, content: newContent } as AgentMessage; +} + +/** + * Find oversized tool result entries in a session and truncate them. + * + * This operates on the session file by: + * 1. Opening the session manager + * 2. Walking the current branch to find oversized tool results + * 3. Branching from before the first oversized tool result + * 4. Re-appending all entries from that point with truncated tool results + * + * @returns Object indicating whether any truncation was performed + */ +export async function truncateOversizedToolResultsInSession(params: { + sessionFile: string; + contextWindowTokens: number; + sessionId?: string; + sessionKey?: string; +}): Promise<{ truncated: boolean; truncatedCount: number; reason?: string }> { + const { sessionFile, contextWindowTokens } = params; + const maxChars = calculateMaxToolResultChars(contextWindowTokens); + + try { + const sessionManager = SessionManager.open(sessionFile); + const branch = sessionManager.getBranch(); + + if (branch.length === 0) { + return { truncated: false, truncatedCount: 0, reason: "empty session" }; + } + + // Find oversized tool result entries and their indices in the branch + const oversizedIndices: number[] = []; + for (let i = 0; i < branch.length; i++) { + const entry = branch[i]; + if (entry.type !== "message") { + continue; + } + const msg = entry.message; + if ((msg as { role?: string }).role !== "toolResult") { + continue; + } + const textLength = getToolResultTextLength(msg); + if (textLength > maxChars) { + oversizedIndices.push(i); + log.info( + `[tool-result-truncation] Found oversized tool result: ` + + `entry=${entry.id} chars=${textLength} maxChars=${maxChars} ` + + `sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`, + ); + } + } + + if (oversizedIndices.length === 0) { + return { truncated: false, truncatedCount: 0, reason: "no oversized tool results" }; + } + + // Branch from the parent of the first oversized entry + const firstOversizedIdx = oversizedIndices[0]; + const firstOversizedEntry = branch[firstOversizedIdx]; + const branchFromId = firstOversizedEntry.parentId; + + if (!branchFromId) { + // The oversized entry is the root - very unusual but handle it + sessionManager.resetLeaf(); + } else { + sessionManager.branch(branchFromId); + } + + // Re-append all entries from the first oversized one onwards, + // with truncated tool results + const oversizedSet = new Set(oversizedIndices); + let truncatedCount = 0; + + for (let i = firstOversizedIdx; i < branch.length; i++) { + const entry = branch[i]; + + if (entry.type === "message") { + let message = entry.message; + + if (oversizedSet.has(i)) { + message = truncateToolResultMessage(message, maxChars); + truncatedCount++; + const newLength = getToolResultTextLength(message); + log.info( + `[tool-result-truncation] Truncated tool result: ` + + `originalEntry=${entry.id} newChars=${newLength} ` + + `sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`, + ); + } + + // appendMessage expects Message | CustomMessage | BashExecutionMessage + sessionManager.appendMessage(message as Parameters[0]); + } else if (entry.type === "compaction") { + sessionManager.appendCompaction( + entry.summary, + entry.firstKeptEntryId, + entry.tokensBefore, + entry.details, + entry.fromHook, + ); + } else if (entry.type === "thinking_level_change") { + sessionManager.appendThinkingLevelChange(entry.thinkingLevel); + } else if (entry.type === "model_change") { + sessionManager.appendModelChange(entry.provider, entry.modelId); + } else if (entry.type === "custom") { + sessionManager.appendCustomEntry(entry.customType, entry.data); + } else if (entry.type === "custom_message") { + sessionManager.appendCustomMessageEntry( + entry.customType, + entry.content, + entry.display, + entry.details, + ); + } else if (entry.type === "branch_summary") { + // Branch summaries reference specific entry IDs - skip to avoid inconsistency + continue; + } else if (entry.type === "label") { + // Labels reference specific entry IDs - skip to avoid inconsistency + continue; + } else if (entry.type === "session_info") { + if (entry.name) { + sessionManager.appendSessionInfo(entry.name); + } + } + } + + log.info( + `[tool-result-truncation] Truncated ${truncatedCount} tool result(s) in session ` + + `(contextWindow=${contextWindowTokens} maxChars=${maxChars}) ` + + `sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`, + ); + + return { truncated: true, truncatedCount }; + } catch (err) { + const errMsg = err instanceof Error ? err.message : String(err); + log.warn(`[tool-result-truncation] Failed to truncate: ${errMsg}`); + return { truncated: false, truncatedCount: 0, reason: errMsg }; + } +} + +/** + * Truncate oversized tool results in an array of messages (in-memory). + * Returns a new array with truncated messages. + * + * This is used as a pre-emptive guard before sending messages to the LLM, + * without modifying the session file. + */ +export function truncateOversizedToolResultsInMessages( + messages: AgentMessage[], + contextWindowTokens: number, +): { messages: AgentMessage[]; truncatedCount: number } { + const maxChars = calculateMaxToolResultChars(contextWindowTokens); + let truncatedCount = 0; + + const result = messages.map((msg) => { + if ((msg as { role?: string }).role !== "toolResult") { + return msg; + } + const textLength = getToolResultTextLength(msg); + if (textLength <= maxChars) { + return msg; + } + truncatedCount++; + return truncateToolResultMessage(msg, maxChars); + }); + + return { messages: result, truncatedCount }; +} + +/** + * Check if a tool result message exceeds the size limit for a given context window. + */ +export function isOversizedToolResult(msg: AgentMessage, contextWindowTokens: number): boolean { + if ((msg as { role?: string }).role !== "toolResult") { + return false; + } + const maxChars = calculateMaxToolResultChars(contextWindowTokens); + return getToolResultTextLength(msg) > maxChars; +} + +/** + * Estimate whether the session likely has oversized tool results that caused + * a context overflow. Used as a heuristic to decide whether to attempt + * tool result truncation before giving up. + */ +export function sessionLikelyHasOversizedToolResults(params: { + messages: AgentMessage[]; + contextWindowTokens: number; +}): boolean { + const { messages, contextWindowTokens } = params; + const maxChars = calculateMaxToolResultChars(contextWindowTokens); + + for (const msg of messages) { + if ((msg as { role?: string }).role !== "toolResult") { + continue; + } + const textLength = getToolResultTextLength(msg); + if (textLength > maxChars) { + return true; + } + } + + return false; +} diff --git a/src/agents/session-tool-result-guard.test.ts b/src/agents/session-tool-result-guard.test.ts index 9f0959b6a9..2f0bc2a02f 100644 --- a/src/agents/session-tool-result-guard.test.ts +++ b/src/agents/session-tool-result-guard.test.ts @@ -206,4 +206,67 @@ describe("installSessionToolResultGuard", () => { expect(messages.map((m) => m.role)).toEqual(["assistant", "toolResult"]); }); + + it("caps oversized tool result text during persistence", () => { + const sm = SessionManager.inMemory(); + installSessionToolResultGuard(sm); + + sm.appendMessage(toolCallMessage); + sm.appendMessage( + asAppendMessage({ + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "x".repeat(500_000) }], + isError: false, + timestamp: Date.now(), + }), + ); + + const entries = sm + .getEntries() + .filter((e) => e.type === "message") + .map((e) => (e as { message: AgentMessage }).message); + + const toolResult = entries.find((m) => m.role === "toolResult") as { + content: Array<{ type: string; text: string }>; + }; + expect(toolResult).toBeDefined(); + const textBlock = toolResult.content.find((b: { type: string }) => b.type === "text") as { + text: string; + }; + expect(textBlock.text.length).toBeLessThan(500_000); + expect(textBlock.text).toContain("truncated"); + }); + + it("does not truncate tool results under the limit", () => { + const sm = SessionManager.inMemory(); + installSessionToolResultGuard(sm); + + const originalText = "small tool result"; + sm.appendMessage(toolCallMessage); + sm.appendMessage( + asAppendMessage({ + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: originalText }], + isError: false, + timestamp: Date.now(), + }), + ); + + const entries = sm + .getEntries() + .filter((e) => e.type === "message") + .map((e) => (e as { message: AgentMessage }).message); + + const toolResult = entries.find((m) => m.role === "toolResult") as { + content: Array<{ type: string; text: string }>; + }; + const textBlock = toolResult.content.find((b: { type: string }) => b.type === "text") as { + text: string; + }; + expect(textBlock.text).toBe(originalText); + }); }); diff --git a/src/agents/session-tool-result-guard.ts b/src/agents/session-tool-result-guard.ts index ea0152ac76..72661a59ff 100644 --- a/src/agents/session-tool-result-guard.ts +++ b/src/agents/session-tool-result-guard.ts @@ -1,8 +1,76 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { TextContent } from "@mariozechner/pi-ai"; import type { SessionManager } from "@mariozechner/pi-coding-agent"; import { emitSessionTranscriptUpdate } from "../sessions/transcript-events.js"; +import { HARD_MAX_TOOL_RESULT_CHARS } from "./pi-embedded-runner/tool-result-truncation.js"; import { makeMissingToolResult, sanitizeToolCallInputs } from "./session-transcript-repair.js"; +const GUARD_TRUNCATION_SUFFIX = + "\n\n⚠️ [Content truncated during persistence — original exceeded size limit. " + + "Use offset/limit parameters or request specific sections for large content.]"; + +/** + * Truncate oversized text content blocks in a tool result message. + * Returns the original message if under the limit, or a new message with + * truncated text blocks otherwise. + */ +function capToolResultSize(msg: AgentMessage): AgentMessage { + const role = (msg as { role?: string }).role; + if (role !== "toolResult") { + return msg; + } + const content = (msg as { content?: unknown }).content; + if (!Array.isArray(content)) { + return msg; + } + + // Calculate total text size + let totalTextChars = 0; + for (const block of content) { + if (block && typeof block === "object" && (block as { type?: string }).type === "text") { + const text = (block as TextContent).text; + if (typeof text === "string") { + totalTextChars += text.length; + } + } + } + + if (totalTextChars <= HARD_MAX_TOOL_RESULT_CHARS) { + return msg; + } + + // Truncate proportionally + const newContent = content.map((block: unknown) => { + if (!block || typeof block !== "object" || (block as { type?: string }).type !== "text") { + return block; + } + const textBlock = block as TextContent; + if (typeof textBlock.text !== "string") { + return block; + } + const blockShare = textBlock.text.length / totalTextChars; + const blockBudget = Math.max( + 2_000, + Math.floor(HARD_MAX_TOOL_RESULT_CHARS * blockShare) - GUARD_TRUNCATION_SUFFIX.length, + ); + if (textBlock.text.length <= blockBudget) { + return block; + } + // Try to cut at a newline boundary + let cutPoint = blockBudget; + const lastNewline = textBlock.text.lastIndexOf("\n", blockBudget); + if (lastNewline > blockBudget * 0.8) { + cutPoint = lastNewline; + } + return { + ...textBlock, + text: textBlock.text.slice(0, cutPoint) + GUARD_TRUNCATION_SUFFIX, + }; + }); + + return { ...msg, content: newContent } as AgentMessage; +} + type ToolCall = { id: string; name?: string }; function extractAssistantToolCalls(msg: Extract): ToolCall[] { @@ -116,8 +184,11 @@ export function installSessionToolResultGuard( if (id) { pending.delete(id); } + // Apply hard size cap before persistence to prevent oversized tool results + // from consuming the entire context window on subsequent LLM calls. + const capped = capToolResultSize(nextMessage); return originalAppend( - persistToolResult(nextMessage, { + persistToolResult(capped, { toolCallId: id ?? undefined, toolName, isSynthetic: false,