fix: recover from context overflow caused by oversized tool results (#11579)

* fix: gracefully handle oversized tool results causing context overflow When a subagent reads a very large file or gets a huge tool result (e.g., gh pr diff on a massive PR), it can exceed the model's context window in a single prompt. Auto-compaction can't help because there's no older history to compact — just one giant tool result. This adds two layers of defense: 1. Pre-emptive: Hard cap on tool result size (400K chars ≈ 100K tokens) applied in the session tool result guard before persistence. This prevents extremely large tool results from being stored in full, regardless of model context window size. 2. Recovery: When context overflow is detected and compaction fails, scan session messages for oversized tool results relative to the model's actual context window (30% max share). If found, truncate them in the session via branching (creating a new branch with truncated content) and retry the prompt. The truncation preserves the beginning of the content (most useful for understanding what was read) and appends a notice explaining the truncation and suggesting offset/limit parameters for targeted reads. Includes comprehensive tests for: - Text truncation with newline-boundary awareness - Context-window-proportional size calculation - In-memory message truncation - Oversized detection heuristics - Guard-level size capping during persistence * fix: prep fixes for tool result truncation PR (#11579) (thanks @tyler6204)
2026-02-09 05:19:32 +08:00 · 2026-02-07 17:40:51 -08:00
parent b8c8130efe
commit 0deb8b0da1
6 changed files with 725 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Agents: recover from context overflow caused by oversized tool results (pre-emptive capping + fallback truncation). (#11579) Thanks @tyler6204.
 - Cron: scheduler reliability (timer drift, restart catch-up, lock contention, stale running markers). (#10776) Thanks @tyler6204.
 - Cron: store migration hardening (legacy field migration, parse error handling, explicit delivery mode persistence). (#10776) Thanks @tyler6204.
 - Gateway/CLI: when `gateway.bind=lan`, use a LAN IP for probe URLs and Control UI links. (#11448) Thanks @AnonO6.
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -52,6 +52,10 @@ import { log } from "./logger.js";
 import { resolveModel } from "./model.js";
 import { runEmbeddedAttempt } from "./run/attempt.js";
 import { buildEmbeddedRunPayloads } from "./run/payloads.js";
+import {
+  truncateOversizedToolResultsInSession,
+  sessionLikelyHasOversizedToolResults,
+} from "./tool-result-truncation.js";
 import { describeUnknownError } from "./utils.js";

 type ApiKeyInfo = ResolvedProviderAuth;
@@ -321,6 +325,7 @@ export async function runEmbeddedPiAgent(

      const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
      let overflowCompactionAttempts = 0;
+      let toolResultTruncationAttempted = false;
      try {
        while (true) {
          attemptedThinking.add(thinkLevel);
@@ -437,6 +442,47 @@ export async function runEmbeddedPiAgent(
                  `auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
                );
              }
+
+              // Fallback: try truncating oversized tool results in the session.
+              // This handles the case where a single tool result (e.g., reading a
+              // huge file or getting a massive PR diff) exceeds the context window,
+              // and compaction can't help because there's no older history to compact.
+              if (!toolResultTruncationAttempted) {
+                const contextWindowTokens = ctxInfo.tokens;
+                const hasOversized = attempt.messagesSnapshot
+                  ? sessionLikelyHasOversizedToolResults({
+                      messages: attempt.messagesSnapshot,
+                      contextWindowTokens,
+                    })
+                  : false;
+
+                if (hasOversized) {
+                  toolResultTruncationAttempted = true;
+                  log.warn(
+                    `[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
+                      `(contextWindow=${contextWindowTokens} tokens)`,
+                  );
+                  const truncResult = await truncateOversizedToolResultsInSession({
+                    sessionFile: params.sessionFile,
+                    contextWindowTokens,
+                    sessionId: params.sessionId,
+                    sessionKey: params.sessionKey,
+                  });
+                  if (truncResult.truncated) {
+                    log.info(
+                      `[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
+                    );
+                    // Reset compaction attempts so compaction can be tried again
+                    // after truncation (the session is now smaller)
+                    overflowCompactionAttempts = 0;
+                    continue;
+                  }
+                  log.warn(
+                    `[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
+                  );
+                }
+              }
+
              const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
              return {
                payloads: [
--- a/src/agents/pi-embedded-runner/tool-result-truncation.test.ts
+++ b/src/agents/pi-embedded-runner/tool-result-truncation.test.ts
@@ -0,0 +1,215 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import { describe, expect, it } from "vitest";
+import {
+  truncateToolResultText,
+  calculateMaxToolResultChars,
+  truncateOversizedToolResultsInMessages,
+  isOversizedToolResult,
+  sessionLikelyHasOversizedToolResults,
+  HARD_MAX_TOOL_RESULT_CHARS,
+} from "./tool-result-truncation.js";
+
+function makeToolResult(text: string, toolCallId = "call_1"): AgentMessage {
+  return {
+    role: "toolResult",
+    toolCallId,
+    toolName: "read",
+    content: [{ type: "text", text }],
+    isError: false,
+    timestamp: Date.now(),
+  } as AgentMessage;
+}
+
+function makeUserMessage(text: string): AgentMessage {
+  return {
+    role: "user",
+    content: text,
+    timestamp: Date.now(),
+  } as AgentMessage;
+}
+
+function makeAssistantMessage(text: string): AgentMessage {
+  return {
+    role: "assistant",
+    content: [{ type: "text", text }],
+    api: "messages",
+    provider: "anthropic",
+    model: "claude-sonnet-4-20250514",
+    usage: {
+      inputTokens: 0,
+      outputTokens: 0,
+      cacheReadInputTokens: 0,
+      cacheCreationInputTokens: 0,
+    },
+    stopReason: "end_turn",
+    timestamp: Date.now(),
+  } as AgentMessage;
+}
+
+describe("truncateToolResultText", () => {
+  it("returns text unchanged when under limit", () => {
+    const text = "hello world";
+    expect(truncateToolResultText(text, 1000)).toBe(text);
+  });
+
+  it("truncates text that exceeds limit", () => {
+    const text = "a".repeat(10_000);
+    const result = truncateToolResultText(text, 5_000);
+    expect(result.length).toBeLessThan(text.length);
+    expect(result).toContain("truncated");
+  });
+
+  it("preserves at least MIN_KEEP_CHARS (2000)", () => {
+    const text = "x".repeat(50_000);
+    const result = truncateToolResultText(text, 100); // Even with small limit
+    expect(result.length).toBeGreaterThan(2000);
+  });
+
+  it("tries to break at newline boundary", () => {
+    const lines = Array.from({ length: 100 }, (_, i) => `line ${i}: ${"x".repeat(50)}`).join("\n");
+    const result = truncateToolResultText(lines, 3000);
+    // Should contain truncation notice
+    expect(result).toContain("truncated");
+    // The truncated content should be shorter than the original
+    expect(result.length).toBeLessThan(lines.length);
+    // Extract the kept content (before the truncation suffix marker)
+    const suffixIndex = result.indexOf("\n\n⚠️");
+    if (suffixIndex > 0) {
+      const keptContent = result.slice(0, suffixIndex);
+      // Should end at a newline boundary (i.e., the last char before suffix is a complete line)
+      const lastNewline = keptContent.lastIndexOf("\n");
+      // The last newline should be near the end (within the last line)
+      expect(lastNewline).toBeGreaterThan(keptContent.length - 100);
+    }
+  });
+});
+
+describe("calculateMaxToolResultChars", () => {
+  it("scales with context window size", () => {
+    const small = calculateMaxToolResultChars(32_000);
+    const large = calculateMaxToolResultChars(200_000);
+    expect(large).toBeGreaterThan(small);
+  });
+
+  it("caps at HARD_MAX_TOOL_RESULT_CHARS for very large windows", () => {
+    const result = calculateMaxToolResultChars(2_000_000); // 2M token window
+    expect(result).toBeLessThanOrEqual(HARD_MAX_TOOL_RESULT_CHARS);
+  });
+
+  it("returns reasonable size for 128K context", () => {
+    const result = calculateMaxToolResultChars(128_000);
+    // 30% of 128K = 38.4K tokens * 4 chars = 153.6K chars
+    expect(result).toBeGreaterThan(100_000);
+    expect(result).toBeLessThan(200_000);
+  });
+});
+
+describe("isOversizedToolResult", () => {
+  it("returns false for small tool results", () => {
+    const msg = makeToolResult("small content");
+    expect(isOversizedToolResult(msg, 200_000)).toBe(false);
+  });
+
+  it("returns true for oversized tool results", () => {
+    const msg = makeToolResult("x".repeat(500_000));
+    expect(isOversizedToolResult(msg, 128_000)).toBe(true);
+  });
+
+  it("returns false for non-toolResult messages", () => {
+    const msg = makeUserMessage("x".repeat(500_000));
+    expect(isOversizedToolResult(msg, 128_000)).toBe(false);
+  });
+});
+
+describe("truncateOversizedToolResultsInMessages", () => {
+  it("returns unchanged messages when nothing is oversized", () => {
+    const messages = [
+      makeUserMessage("hello"),
+      makeAssistantMessage("using tool"),
+      makeToolResult("small result"),
+    ];
+    const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages(
+      messages,
+      200_000,
+    );
+    expect(truncatedCount).toBe(0);
+    expect(result).toEqual(messages);
+  });
+
+  it("truncates oversized tool results", () => {
+    const bigContent = "x".repeat(500_000);
+    const messages = [
+      makeUserMessage("hello"),
+      makeAssistantMessage("reading file"),
+      makeToolResult(bigContent),
+    ];
+    const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages(
+      messages,
+      128_000,
+    );
+    expect(truncatedCount).toBe(1);
+    const toolResult = result[2] as { content: Array<{ text: string }> };
+    expect(toolResult.content[0].text.length).toBeLessThan(bigContent.length);
+    expect(toolResult.content[0].text).toContain("truncated");
+  });
+
+  it("preserves non-toolResult messages", () => {
+    const messages = [
+      makeUserMessage("hello"),
+      makeAssistantMessage("reading file"),
+      makeToolResult("x".repeat(500_000)),
+    ];
+    const { messages: result } = truncateOversizedToolResultsInMessages(messages, 128_000);
+    expect(result[0]).toBe(messages[0]); // Same reference
+    expect(result[1]).toBe(messages[1]); // Same reference
+  });
+
+  it("handles multiple oversized tool results", () => {
+    const messages = [
+      makeUserMessage("hello"),
+      makeAssistantMessage("reading files"),
+      makeToolResult("x".repeat(500_000), "call_1"),
+      makeToolResult("y".repeat(500_000), "call_2"),
+    ];
+    const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages(
+      messages,
+      128_000,
+    );
+    expect(truncatedCount).toBe(2);
+    for (const msg of result.slice(2)) {
+      const tr = msg as { content: Array<{ text: string }> };
+      expect(tr.content[0].text.length).toBeLessThan(500_000);
+    }
+  });
+});
+
+describe("sessionLikelyHasOversizedToolResults", () => {
+  it("returns false when no tool results are oversized", () => {
+    const messages = [makeUserMessage("hello"), makeToolResult("small result")];
+    expect(
+      sessionLikelyHasOversizedToolResults({
+        messages,
+        contextWindowTokens: 200_000,
+      }),
+    ).toBe(false);
+  });
+
+  it("returns true when a tool result is oversized", () => {
+    const messages = [makeUserMessage("hello"), makeToolResult("x".repeat(500_000))];
+    expect(
+      sessionLikelyHasOversizedToolResults({
+        messages,
+        contextWindowTokens: 128_000,
+      }),
+    ).toBe(true);
+  });
+
+  it("returns false for empty messages", () => {
+    expect(
+      sessionLikelyHasOversizedToolResults({
+        messages: [],
+        contextWindowTokens: 200_000,
+      }),
+    ).toBe(false);
+  });
+});
--- a/src/agents/pi-embedded-runner/tool-result-truncation.ts
+++ b/src/agents/pi-embedded-runner/tool-result-truncation.ts
@@ -0,0 +1,328 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type { TextContent } from "@mariozechner/pi-ai";
+import { SessionManager } from "@mariozechner/pi-coding-agent";
+import { log } from "./logger.js";
+
+/**
+ * Maximum share of the context window a single tool result should occupy.
+ * This is intentionally conservative – a single tool result should not
+ * consume more than 30% of the context window even without other messages.
+ */
+const MAX_TOOL_RESULT_CONTEXT_SHARE = 0.3;
+
+/**
+ * Hard character limit for a single tool result text block.
+ * Even for the largest context windows (~2M tokens), a single tool result
+ * should not exceed ~400K characters (~100K tokens).
+ * This acts as a safety net when we don't know the context window size.
+ */
+export const HARD_MAX_TOOL_RESULT_CHARS = 400_000;
+
+/**
+ * Minimum characters to keep when truncating.
+ * We always keep at least the first portion so the model understands
+ * what was in the content.
+ */
+const MIN_KEEP_CHARS = 2_000;
+
+/**
+ * Suffix appended to truncated tool results.
+ */
+const TRUNCATION_SUFFIX =
+  "\n\n⚠️ [Content truncated — original was too large for the model's context window. " +
+  "The content above is a partial view. If you need more, request specific sections or use " +
+  "offset/limit parameters to read smaller chunks.]";
+
+/**
+ * Truncate a single text string to fit within maxChars, preserving the beginning.
+ */
+export function truncateToolResultText(text: string, maxChars: number): string {
+  if (text.length <= maxChars) {
+    return text;
+  }
+  const keepChars = Math.max(MIN_KEEP_CHARS, maxChars - TRUNCATION_SUFFIX.length);
+  // Try to break at a newline boundary to avoid cutting mid-line
+  let cutPoint = keepChars;
+  const lastNewline = text.lastIndexOf("\n", keepChars);
+  if (lastNewline > keepChars * 0.8) {
+    cutPoint = lastNewline;
+  }
+  return text.slice(0, cutPoint) + TRUNCATION_SUFFIX;
+}
+
+/**
+ * Calculate the maximum allowed characters for a single tool result
+ * based on the model's context window tokens.
+ *
+ * Uses a rough 4 chars ≈ 1 token heuristic (conservative for English text;
+ * actual ratio varies by tokenizer).
+ */
+export function calculateMaxToolResultChars(contextWindowTokens: number): number {
+  const maxTokens = Math.floor(contextWindowTokens * MAX_TOOL_RESULT_CONTEXT_SHARE);
+  // Rough conversion: ~4 chars per token on average
+  const maxChars = maxTokens * 4;
+  return Math.min(maxChars, HARD_MAX_TOOL_RESULT_CHARS);
+}
+
+/**
+ * Get the total character count of text content blocks in a tool result message.
+ */
+function getToolResultTextLength(msg: AgentMessage): number {
+  if (!msg || (msg as { role?: string }).role !== "toolResult") {
+    return 0;
+  }
+  const content = (msg as { content?: unknown }).content;
+  if (!Array.isArray(content)) {
+    return 0;
+  }
+  let totalLength = 0;
+  for (const block of content) {
+    if (block && typeof block === "object" && (block as { type?: string }).type === "text") {
+      const text = (block as TextContent).text;
+      if (typeof text === "string") {
+        totalLength += text.length;
+      }
+    }
+  }
+  return totalLength;
+}
+
+/**
+ * Truncate a tool result message's text content blocks to fit within maxChars.
+ * Returns a new message (does not mutate the original).
+ */
+function truncateToolResultMessage(msg: AgentMessage, maxChars: number): AgentMessage {
+  const content = (msg as { content?: unknown }).content;
+  if (!Array.isArray(content)) {
+    return msg;
+  }
+
+  // Calculate total text size
+  const totalTextChars = getToolResultTextLength(msg);
+  if (totalTextChars <= maxChars) {
+    return msg;
+  }
+
+  // Distribute the budget proportionally among text blocks
+  const newContent = content.map((block: unknown) => {
+    if (!block || typeof block !== "object" || (block as { type?: string }).type !== "text") {
+      return block; // Keep non-text blocks (images) as-is
+    }
+    const textBlock = block as TextContent;
+    if (typeof textBlock.text !== "string") {
+      return block;
+    }
+    // Proportional budget for this block
+    const blockShare = textBlock.text.length / totalTextChars;
+    const blockBudget = Math.max(MIN_KEEP_CHARS, Math.floor(maxChars * blockShare));
+    return {
+      ...textBlock,
+      text: truncateToolResultText(textBlock.text, blockBudget),
+    };
+  });
+
+  return { ...msg, content: newContent } as AgentMessage;
+}
+
+/**
+ * Find oversized tool result entries in a session and truncate them.
+ *
+ * This operates on the session file by:
+ * 1. Opening the session manager
+ * 2. Walking the current branch to find oversized tool results
+ * 3. Branching from before the first oversized tool result
+ * 4. Re-appending all entries from that point with truncated tool results
+ *
+ * @returns Object indicating whether any truncation was performed
+ */
+export async function truncateOversizedToolResultsInSession(params: {
+  sessionFile: string;
+  contextWindowTokens: number;
+  sessionId?: string;
+  sessionKey?: string;
+}): Promise<{ truncated: boolean; truncatedCount: number; reason?: string }> {
+  const { sessionFile, contextWindowTokens } = params;
+  const maxChars = calculateMaxToolResultChars(contextWindowTokens);
+
+  try {
+    const sessionManager = SessionManager.open(sessionFile);
+    const branch = sessionManager.getBranch();
+
+    if (branch.length === 0) {
+      return { truncated: false, truncatedCount: 0, reason: "empty session" };
+    }
+
+    // Find oversized tool result entries and their indices in the branch
+    const oversizedIndices: number[] = [];
+    for (let i = 0; i < branch.length; i++) {
+      const entry = branch[i];
+      if (entry.type !== "message") {
+        continue;
+      }
+      const msg = entry.message;
+      if ((msg as { role?: string }).role !== "toolResult") {
+        continue;
+      }
+      const textLength = getToolResultTextLength(msg);
+      if (textLength > maxChars) {
+        oversizedIndices.push(i);
+        log.info(
+          `[tool-result-truncation] Found oversized tool result: ` +
+            `entry=${entry.id} chars=${textLength} maxChars=${maxChars} ` +
+            `sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
+        );
+      }
+    }
+
+    if (oversizedIndices.length === 0) {
+      return { truncated: false, truncatedCount: 0, reason: "no oversized tool results" };
+    }
+
+    // Branch from the parent of the first oversized entry
+    const firstOversizedIdx = oversizedIndices[0];
+    const firstOversizedEntry = branch[firstOversizedIdx];
+    const branchFromId = firstOversizedEntry.parentId;
+
+    if (!branchFromId) {
+      // The oversized entry is the root - very unusual but handle it
+      sessionManager.resetLeaf();
+    } else {
+      sessionManager.branch(branchFromId);
+    }
+
+    // Re-append all entries from the first oversized one onwards,
+    // with truncated tool results
+    const oversizedSet = new Set(oversizedIndices);
+    let truncatedCount = 0;
+
+    for (let i = firstOversizedIdx; i < branch.length; i++) {
+      const entry = branch[i];
+
+      if (entry.type === "message") {
+        let message = entry.message;
+
+        if (oversizedSet.has(i)) {
+          message = truncateToolResultMessage(message, maxChars);
+          truncatedCount++;
+          const newLength = getToolResultTextLength(message);
+          log.info(
+            `[tool-result-truncation] Truncated tool result: ` +
+              `originalEntry=${entry.id} newChars=${newLength} ` +
+              `sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
+          );
+        }
+
+        // appendMessage expects Message | CustomMessage | BashExecutionMessage
+        sessionManager.appendMessage(message as Parameters<typeof sessionManager.appendMessage>[0]);
+      } else if (entry.type === "compaction") {
+        sessionManager.appendCompaction(
+          entry.summary,
+          entry.firstKeptEntryId,
+          entry.tokensBefore,
+          entry.details,
+          entry.fromHook,
+        );
+      } else if (entry.type === "thinking_level_change") {
+        sessionManager.appendThinkingLevelChange(entry.thinkingLevel);
+      } else if (entry.type === "model_change") {
+        sessionManager.appendModelChange(entry.provider, entry.modelId);
+      } else if (entry.type === "custom") {
+        sessionManager.appendCustomEntry(entry.customType, entry.data);
+      } else if (entry.type === "custom_message") {
+        sessionManager.appendCustomMessageEntry(
+          entry.customType,
+          entry.content,
+          entry.display,
+          entry.details,
+        );
+      } else if (entry.type === "branch_summary") {
+        // Branch summaries reference specific entry IDs - skip to avoid inconsistency
+        continue;
+      } else if (entry.type === "label") {
+        // Labels reference specific entry IDs - skip to avoid inconsistency
+        continue;
+      } else if (entry.type === "session_info") {
+        if (entry.name) {
+          sessionManager.appendSessionInfo(entry.name);
+        }
+      }
+    }
+
+    log.info(
+      `[tool-result-truncation] Truncated ${truncatedCount} tool result(s) in session ` +
+        `(contextWindow=${contextWindowTokens} maxChars=${maxChars}) ` +
+        `sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
+    );
+
+    return { truncated: true, truncatedCount };
+  } catch (err) {
+    const errMsg = err instanceof Error ? err.message : String(err);
+    log.warn(`[tool-result-truncation] Failed to truncate: ${errMsg}`);
+    return { truncated: false, truncatedCount: 0, reason: errMsg };
+  }
+}
+
+/**
+ * Truncate oversized tool results in an array of messages (in-memory).
+ * Returns a new array with truncated messages.
+ *
+ * This is used as a pre-emptive guard before sending messages to the LLM,
+ * without modifying the session file.
+ */
+export function truncateOversizedToolResultsInMessages(
+  messages: AgentMessage[],
+  contextWindowTokens: number,
+): { messages: AgentMessage[]; truncatedCount: number } {
+  const maxChars = calculateMaxToolResultChars(contextWindowTokens);
+  let truncatedCount = 0;
+
+  const result = messages.map((msg) => {
+    if ((msg as { role?: string }).role !== "toolResult") {
+      return msg;
+    }
+    const textLength = getToolResultTextLength(msg);
+    if (textLength <= maxChars) {
+      return msg;
+    }
+    truncatedCount++;
+    return truncateToolResultMessage(msg, maxChars);
+  });
+
+  return { messages: result, truncatedCount };
+}
+
+/**
+ * Check if a tool result message exceeds the size limit for a given context window.
+ */
+export function isOversizedToolResult(msg: AgentMessage, contextWindowTokens: number): boolean {
+  if ((msg as { role?: string }).role !== "toolResult") {
+    return false;
+  }
+  const maxChars = calculateMaxToolResultChars(contextWindowTokens);
+  return getToolResultTextLength(msg) > maxChars;
+}
+
+/**
+ * Estimate whether the session likely has oversized tool results that caused
+ * a context overflow. Used as a heuristic to decide whether to attempt
+ * tool result truncation before giving up.
+ */
+export function sessionLikelyHasOversizedToolResults(params: {
+  messages: AgentMessage[];
+  contextWindowTokens: number;
+}): boolean {
+  const { messages, contextWindowTokens } = params;
+  const maxChars = calculateMaxToolResultChars(contextWindowTokens);
+
+  for (const msg of messages) {
+    if ((msg as { role?: string }).role !== "toolResult") {
+      continue;
+    }
+    const textLength = getToolResultTextLength(msg);
+    if (textLength > maxChars) {
+      return true;
+    }
+  }
+
+  return false;
+}
--- a/src/agents/session-tool-result-guard.test.ts
+++ b/src/agents/session-tool-result-guard.test.ts
@@ -206,4 +206,67 @@ describe("installSessionToolResultGuard", () => {

    expect(messages.map((m) => m.role)).toEqual(["assistant", "toolResult"]);
  });
+
+  it("caps oversized tool result text during persistence", () => {
+    const sm = SessionManager.inMemory();
+    installSessionToolResultGuard(sm);
+
+    sm.appendMessage(toolCallMessage);
+    sm.appendMessage(
+      asAppendMessage({
+        role: "toolResult",
+        toolCallId: "call_1",
+        toolName: "read",
+        content: [{ type: "text", text: "x".repeat(500_000) }],
+        isError: false,
+        timestamp: Date.now(),
+      }),
+    );
+
+    const entries = sm
+      .getEntries()
+      .filter((e) => e.type === "message")
+      .map((e) => (e as { message: AgentMessage }).message);
+
+    const toolResult = entries.find((m) => m.role === "toolResult") as {
+      content: Array<{ type: string; text: string }>;
+    };
+    expect(toolResult).toBeDefined();
+    const textBlock = toolResult.content.find((b: { type: string }) => b.type === "text") as {
+      text: string;
+    };
+    expect(textBlock.text.length).toBeLessThan(500_000);
+    expect(textBlock.text).toContain("truncated");
+  });
+
+  it("does not truncate tool results under the limit", () => {
+    const sm = SessionManager.inMemory();
+    installSessionToolResultGuard(sm);
+
+    const originalText = "small tool result";
+    sm.appendMessage(toolCallMessage);
+    sm.appendMessage(
+      asAppendMessage({
+        role: "toolResult",
+        toolCallId: "call_1",
+        toolName: "read",
+        content: [{ type: "text", text: originalText }],
+        isError: false,
+        timestamp: Date.now(),
+      }),
+    );
+
+    const entries = sm
+      .getEntries()
+      .filter((e) => e.type === "message")
+      .map((e) => (e as { message: AgentMessage }).message);
+
+    const toolResult = entries.find((m) => m.role === "toolResult") as {
+      content: Array<{ type: string; text: string }>;
+    };
+    const textBlock = toolResult.content.find((b: { type: string }) => b.type === "text") as {
+      text: string;
+    };
+    expect(textBlock.text).toBe(originalText);
+  });
 });
--- a/src/agents/session-tool-result-guard.ts
+++ b/src/agents/session-tool-result-guard.ts
@@ -1,8 +1,76 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type { TextContent } from "@mariozechner/pi-ai";
 import type { SessionManager } from "@mariozechner/pi-coding-agent";
 import { emitSessionTranscriptUpdate } from "../sessions/transcript-events.js";
+import { HARD_MAX_TOOL_RESULT_CHARS } from "./pi-embedded-runner/tool-result-truncation.js";
 import { makeMissingToolResult, sanitizeToolCallInputs } from "./session-transcript-repair.js";

+const GUARD_TRUNCATION_SUFFIX =
+  "\n\n⚠️ [Content truncated during persistence — original exceeded size limit. " +
+  "Use offset/limit parameters or request specific sections for large content.]";
+
+/**
+ * Truncate oversized text content blocks in a tool result message.
+ * Returns the original message if under the limit, or a new message with
+ * truncated text blocks otherwise.
+ */
+function capToolResultSize(msg: AgentMessage): AgentMessage {
+  const role = (msg as { role?: string }).role;
+  if (role !== "toolResult") {
+    return msg;
+  }
+  const content = (msg as { content?: unknown }).content;
+  if (!Array.isArray(content)) {
+    return msg;
+  }
+
+  // Calculate total text size
+  let totalTextChars = 0;
+  for (const block of content) {
+    if (block && typeof block === "object" && (block as { type?: string }).type === "text") {
+      const text = (block as TextContent).text;
+      if (typeof text === "string") {
+        totalTextChars += text.length;
+      }
+    }
+  }
+
+  if (totalTextChars <= HARD_MAX_TOOL_RESULT_CHARS) {
+    return msg;
+  }
+
+  // Truncate proportionally
+  const newContent = content.map((block: unknown) => {
+    if (!block || typeof block !== "object" || (block as { type?: string }).type !== "text") {
+      return block;
+    }
+    const textBlock = block as TextContent;
+    if (typeof textBlock.text !== "string") {
+      return block;
+    }
+    const blockShare = textBlock.text.length / totalTextChars;
+    const blockBudget = Math.max(
+      2_000,
+      Math.floor(HARD_MAX_TOOL_RESULT_CHARS * blockShare) - GUARD_TRUNCATION_SUFFIX.length,
+    );
+    if (textBlock.text.length <= blockBudget) {
+      return block;
+    }
+    // Try to cut at a newline boundary
+    let cutPoint = blockBudget;
+    const lastNewline = textBlock.text.lastIndexOf("\n", blockBudget);
+    if (lastNewline > blockBudget * 0.8) {
+      cutPoint = lastNewline;
+    }
+    return {
+      ...textBlock,
+      text: textBlock.text.slice(0, cutPoint) + GUARD_TRUNCATION_SUFFIX,
+    };
+  });
+
+  return { ...msg, content: newContent } as AgentMessage;
+}
+
 type ToolCall = { id: string; name?: string };

 function extractAssistantToolCalls(msg: Extract<AgentMessage, { role: "assistant" }>): ToolCall[] {
@@ -116,8 +184,11 @@ export function installSessionToolResultGuard(
      if (id) {
        pending.delete(id);
      }
+      // Apply hard size cap before persistence to prevent oversized tool results
+      // from consuming the entire context window on subsequent LLM calls.
+      const capped = capToolResultSize(nextMessage);
      return originalAppend(
-        persistToolResult(nextMessage, {
+        persistToolResult(capped, {
          toolCallId: id ?? undefined,
          toolName,
          isSynthetic: false,