fix: preserve reasoning tags inside code blocks (#4118) (thanks @vinaygit18)

This commit is contained in:
Shakker
2026-01-29 18:53:05 +00:00
parent c9fe062824
commit 4583f88626
3 changed files with 276 additions and 3 deletions

View File

@@ -46,6 +46,7 @@ Status: beta.
- Config: auto-migrate legacy state/config paths; honor state dir overrides.
- Packaging: include missing dist/shared and dist/link-understanding outputs in npm tarball installs.
- Telegram: avoid silent empty replies, improve polling/network recovery, handle video notes, keep DM thread sessions, ignore non-forum message_thread_id, centralize API error logging, include AccountId in native command context. (#3796, #3013, #2905, #2731, #2492, #2942)
- Telegram: preserve reasoning tags inside code blocks. (#3952) Thanks @vinaygit18.
- Discord: restore username resolution, resolve outbound usernames to IDs, honor threadId replies, guard forum thread access. (#3131, #2649)
- BlueBubbles: coalesce URL link previews, improve reaction handling, preserve reply-tag GUIDs. (#1981, #1641)
- Voice Call: prevent TTS overlap, validate env-var config, return TwiML for conversation calls. (#1713, #1634)

View File

@@ -0,0 +1,218 @@
import { describe, expect, it } from "vitest";
import { stripReasoningTagsFromText } from "./reasoning-tags.js";
describe("stripReasoningTagsFromText", () => {
describe("basic functionality", () => {
it("returns text unchanged when no reasoning tags present", () => {
const input = "Hello, this is a normal message.";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
it("strips proper think tags", () => {
const input = "Hello <think>internal reasoning</think> world!";
expect(stripReasoningTagsFromText(input)).toBe("Hello world!");
});
it("strips thinking tags", () => {
const input = "Before <thinking>some thought</thinking> after";
expect(stripReasoningTagsFromText(input)).toBe("Before after");
});
it("strips thought tags", () => {
const input = "A <thought>hmm</thought> B";
expect(stripReasoningTagsFromText(input)).toBe("A B");
});
it("strips antthinking tags", () => {
const input = "X <antthinking>internal</antthinking> Y";
expect(stripReasoningTagsFromText(input)).toBe("X Y");
});
it("strips multiple reasoning blocks", () => {
const input = "<think>first</think>A<think>second</think>B";
expect(stripReasoningTagsFromText(input)).toBe("AB");
});
});
describe("code block preservation (issue #3952)", () => {
it("preserves think tags inside fenced code blocks", () => {
const input = "Use the tag like this:\n```\n<think>reasoning</think>\n```\nThat's it!";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
it("preserves think tags inside inline code", () => {
const input =
"The `<think>` tag is used for reasoning. Don't forget the closing `</think>` tag.";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
it("preserves tags in fenced code blocks with language specifier", () => {
const input = "Example:\n```xml\n<think>\n <thought>nested</thought>\n</think>\n```\nDone!";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
it("handles mixed real tags and code tags", () => {
const input = "<think>hidden</think>Visible text with `<think>` example.";
expect(stripReasoningTagsFromText(input)).toBe("Visible text with `<think>` example.");
});
it("preserves both opening and closing tags in backticks", () => {
const input = "Use `<think>` to open and `</think>` to close.";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
it("preserves think tags in code block at EOF without trailing newline", () => {
const input = "Example:\n```\n<think>reasoning</think>\n```";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
it("preserves final tags inside code blocks", () => {
const input = "Use `<final>` for final answers in code: ```\n<final>42</final>\n```";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
it("handles code block followed by real tags", () => {
const input = "```\n<think>code</think>\n```\n<think>real hidden</think>visible";
expect(stripReasoningTagsFromText(input)).toBe("```\n<think>code</think>\n```\nvisible");
});
it("handles multiple code blocks with tags", () => {
const input = "First `<think>` then ```\n<thinking>block</thinking>\n``` then `<thought>`";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
});
describe("edge cases", () => {
it("preserves unclosed <think without angle bracket", () => {
const input = "Here is how to use <think tags in your code";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
it("strips lone closing tag outside code", () => {
const input = "You can start with <think and then close with </think>";
expect(stripReasoningTagsFromText(input)).toBe(
"You can start with <think and then close with",
);
});
it("handles tags with whitespace", () => {
const input = "A < think >content< /think > B";
expect(stripReasoningTagsFromText(input)).toBe("A B");
});
it("handles empty input", () => {
expect(stripReasoningTagsFromText("")).toBe("");
});
it("handles null-ish input", () => {
expect(stripReasoningTagsFromText(null as unknown as string)).toBe(null);
});
it("preserves think tags inside tilde fenced code blocks", () => {
const input = "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
it("preserves tags in tilde block at EOF without trailing newline", () => {
const input = "Example:\n~~~js\n<think>code</think>\n~~~";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
it("handles nested think patterns (first close ends block)", () => {
const input = "<think>outer <think>inner</think> still outer</think>visible";
expect(stripReasoningTagsFromText(input)).toBe("still outervisible");
});
it("strips final tag markup but preserves content (by design)", () => {
const input = "A<final>1</final>B<final>2</final>C";
expect(stripReasoningTagsFromText(input)).toBe("A1B2C");
});
it("preserves final tags in inline code (markup only stripped outside)", () => {
const input = "`<final>` in code, <final>visible</final> outside";
expect(stripReasoningTagsFromText(input)).toBe("`<final>` in code, visible outside");
});
it("handles double backtick inline code with tags", () => {
const input = "Use ``code`` with <think>hidden</think> text";
expect(stripReasoningTagsFromText(input)).toBe("Use ``code`` with text");
});
it("handles fenced code blocks with content", () => {
const input = "Before\n```\ncode\n```\nAfter with <think>hidden</think>";
expect(stripReasoningTagsFromText(input)).toBe("Before\n```\ncode\n```\nAfter with");
});
it("does not match mismatched fence types (``` vs ~~~)", () => {
const input = "```\n<think>not protected\n~~~\n</think>text";
const result = stripReasoningTagsFromText(input);
expect(result).toBe(input);
});
it("handles unicode content inside and around tags", () => {
const input = "你好 <think>思考 🤔</think> 世界";
expect(stripReasoningTagsFromText(input)).toBe("你好 世界");
});
it("handles very long content between tags efficiently", () => {
const longContent = "x".repeat(10000);
const input = `<think>${longContent}</think>visible`;
expect(stripReasoningTagsFromText(input)).toBe("visible");
});
it("handles tags with attributes", () => {
const input = "A <think id='test' class=\"foo\">hidden</think> B";
expect(stripReasoningTagsFromText(input)).toBe("A B");
});
it("is case-insensitive for tag names", () => {
const input = "A <THINK>hidden</THINK> <Thinking>also hidden</Thinking> B";
expect(stripReasoningTagsFromText(input)).toBe("A B");
});
it("handles pathological nested backtick patterns without hanging", () => {
const input = "`".repeat(100) + "<think>test</think>" + "`".repeat(100);
const start = Date.now();
stripReasoningTagsFromText(input);
const elapsed = Date.now() - start;
expect(elapsed).toBeLessThan(1000);
});
it("handles unclosed inline code gracefully", () => {
const input = "Start `unclosed <think>hidden</think> end";
const result = stripReasoningTagsFromText(input);
expect(result).toBe("Start `unclosed end");
});
});
describe("strict vs preserve mode", () => {
it("strict mode truncates on unclosed tag", () => {
const input = "Before <think>unclosed content after";
expect(stripReasoningTagsFromText(input, { mode: "strict" })).toBe("Before");
});
it("preserve mode keeps content after unclosed tag", () => {
const input = "Before <think>unclosed content after";
expect(stripReasoningTagsFromText(input, { mode: "preserve" })).toBe(
"Before unclosed content after",
);
});
});
describe("trim options", () => {
it("trims both sides by default", () => {
const input = " <think>x</think> result <think>y</think> ";
expect(stripReasoningTagsFromText(input)).toBe("result");
});
it("trim=none preserves whitespace", () => {
const input = " <think>x</think> result ";
expect(stripReasoningTagsFromText(input, { trim: "none" })).toBe(" result ");
});
it("trim=start only trims start", () => {
const input = " <think>x</think> result ";
expect(stripReasoningTagsFromText(input, { trim: "start" })).toBe("result ");
});
});
});

View File

@@ -2,8 +2,40 @@ export type ReasoningTagMode = "strict" | "preserve";
export type ReasoningTagTrim = "none" | "start" | "both";
const QUICK_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking|final)\b/i;
const FINAL_TAG_RE = /<\s*\/?\s*final\b[^>]*>/gi;
const THINKING_TAG_RE = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>/gi;
const FINAL_TAG_RE = /<\s*\/?\s*final\b[^<>]*>/gi;
const THINKING_TAG_RE = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\b[^<>]*>/gi;
interface CodeRegion {
start: number;
end: number;
}
function findCodeRegions(text: string): CodeRegion[] {
const regions: CodeRegion[] = [];
const fencedRe = /(^|\n)(```|~~~)[^\n]*\n[\s\S]*?(?:\n\2(?:\n|$)|$)/g;
for (const match of text.matchAll(fencedRe)) {
const start = (match.index ?? 0) + match[1].length;
regions.push({ start, end: start + match[0].length - match[1].length });
}
const inlineRe = /`+[^`]+`+/g;
for (const match of text.matchAll(inlineRe)) {
const start = match.index ?? 0;
const end = start + match[0].length;
const insideFenced = regions.some((r) => start >= r.start && end <= r.end);
if (!insideFenced) {
regions.push({ start, end });
}
}
regions.sort((a, b) => a.start - b.start);
return regions;
}
function isInsideCode(pos: number, regions: CodeRegion[]): boolean {
return regions.some((r) => pos >= r.start && pos < r.end);
}
function applyTrim(value: string, mode: ReasoningTagTrim): string {
if (mode === "none") return value;
@@ -27,11 +59,29 @@ export function stripReasoningTagsFromText(
let cleaned = text;
if (FINAL_TAG_RE.test(cleaned)) {
FINAL_TAG_RE.lastIndex = 0;
cleaned = cleaned.replace(FINAL_TAG_RE, "");
const finalMatches: Array<{ start: number; length: number; inCode: boolean }> = [];
const preCodeRegions = findCodeRegions(cleaned);
for (const match of cleaned.matchAll(FINAL_TAG_RE)) {
const start = match.index ?? 0;
finalMatches.push({
start,
length: match[0].length,
inCode: isInsideCode(start, preCodeRegions),
});
}
for (let i = finalMatches.length - 1; i >= 0; i--) {
const m = finalMatches[i];
if (!m.inCode) {
cleaned = cleaned.slice(0, m.start) + cleaned.slice(m.start + m.length);
}
}
} else {
FINAL_TAG_RE.lastIndex = 0;
}
const codeRegions = findCodeRegions(cleaned);
THINKING_TAG_RE.lastIndex = 0;
let result = "";
let lastIndex = 0;
@@ -41,6 +91,10 @@ export function stripReasoningTagsFromText(
const idx = match.index ?? 0;
const isClose = match[1] === "/";
if (isInsideCode(idx, codeRegions)) {
continue;
}
if (!inThinking) {
result += cleaned.slice(lastIndex, idx);
if (!isClose) {