diff --git a/CHANGELOG.md b/CHANGELOG.md index 208b64a8d6..a134359f54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ Status: beta. - Config: auto-migrate legacy state/config paths; honor state dir overrides. - Packaging: include missing dist/shared and dist/link-understanding outputs in npm tarball installs. - Telegram: avoid silent empty replies, improve polling/network recovery, handle video notes, keep DM thread sessions, ignore non-forum message_thread_id, centralize API error logging, include AccountId in native command context. (#3796, #3013, #2905, #2731, #2492, #2942) +- Telegram: preserve reasoning tags inside code blocks. (#3952) Thanks @vinaygit18. - Discord: restore username resolution, resolve outbound usernames to IDs, honor threadId replies, guard forum thread access. (#3131, #2649) - BlueBubbles: coalesce URL link previews, improve reaction handling, preserve reply-tag GUIDs. (#1981, #1641) - Voice Call: prevent TTS overlap, validate env-var config, return TwiML for conversation calls. (#1713, #1634) diff --git a/src/shared/text/reasoning-tags.test.ts b/src/shared/text/reasoning-tags.test.ts new file mode 100644 index 0000000000..d72d0cde2a --- /dev/null +++ b/src/shared/text/reasoning-tags.test.ts @@ -0,0 +1,218 @@ +import { describe, expect, it } from "vitest"; +import { stripReasoningTagsFromText } from "./reasoning-tags.js"; + +describe("stripReasoningTagsFromText", () => { + describe("basic functionality", () => { + it("returns text unchanged when no reasoning tags present", () => { + const input = "Hello, this is a normal message."; + expect(stripReasoningTagsFromText(input)).toBe(input); + }); + + it("strips proper think tags", () => { + const input = "Hello internal reasoning world!"; + expect(stripReasoningTagsFromText(input)).toBe("Hello world!"); + }); + + it("strips thinking tags", () => { + const input = "Before some thought after"; + expect(stripReasoningTagsFromText(input)).toBe("Before after"); + }); + + it("strips thought tags", () => { + const input = "A hmm B"; + expect(stripReasoningTagsFromText(input)).toBe("A B"); + }); + + it("strips antthinking tags", () => { + const input = "X internal Y"; + expect(stripReasoningTagsFromText(input)).toBe("X Y"); + }); + + it("strips multiple reasoning blocks", () => { + const input = "firstAsecondB"; + expect(stripReasoningTagsFromText(input)).toBe("AB"); + }); + }); + + describe("code block preservation (issue #3952)", () => { + it("preserves think tags inside fenced code blocks", () => { + const input = "Use the tag like this:\n```\nreasoning\n```\nThat's it!"; + expect(stripReasoningTagsFromText(input)).toBe(input); + }); + + it("preserves think tags inside inline code", () => { + const input = + "The `` tag is used for reasoning. Don't forget the closing `` tag."; + expect(stripReasoningTagsFromText(input)).toBe(input); + }); + + it("preserves tags in fenced code blocks with language specifier", () => { + const input = "Example:\n```xml\n\n nested\n\n```\nDone!"; + expect(stripReasoningTagsFromText(input)).toBe(input); + }); + + it("handles mixed real tags and code tags", () => { + const input = "hiddenVisible text with `` example."; + expect(stripReasoningTagsFromText(input)).toBe("Visible text with `` example."); + }); + + it("preserves both opening and closing tags in backticks", () => { + const input = "Use `` to open and `` to close."; + expect(stripReasoningTagsFromText(input)).toBe(input); + }); + + it("preserves think tags in code block at EOF without trailing newline", () => { + const input = "Example:\n```\nreasoning\n```"; + expect(stripReasoningTagsFromText(input)).toBe(input); + }); + + it("preserves final tags inside code blocks", () => { + const input = "Use `` for final answers in code: ```\n42\n```"; + expect(stripReasoningTagsFromText(input)).toBe(input); + }); + + it("handles code block followed by real tags", () => { + const input = "```\ncode\n```\nreal hiddenvisible"; + expect(stripReasoningTagsFromText(input)).toBe("```\ncode\n```\nvisible"); + }); + + it("handles multiple code blocks with tags", () => { + const input = "First `` then ```\nblock\n``` then ``"; + expect(stripReasoningTagsFromText(input)).toBe(input); + }); + }); + + describe("edge cases", () => { + it("preserves unclosed { + const input = "Here is how to use { + const input = "You can start with "; + expect(stripReasoningTagsFromText(input)).toBe( + "You can start with { + const input = "A < think >content< /think > B"; + expect(stripReasoningTagsFromText(input)).toBe("A B"); + }); + + it("handles empty input", () => { + expect(stripReasoningTagsFromText("")).toBe(""); + }); + + it("handles null-ish input", () => { + expect(stripReasoningTagsFromText(null as unknown as string)).toBe(null); + }); + + it("preserves think tags inside tilde fenced code blocks", () => { + const input = "Example:\n~~~\nreasoning\n~~~\nDone!"; + expect(stripReasoningTagsFromText(input)).toBe(input); + }); + + it("preserves tags in tilde block at EOF without trailing newline", () => { + const input = "Example:\n~~~js\ncode\n~~~"; + expect(stripReasoningTagsFromText(input)).toBe(input); + }); + + it("handles nested think patterns (first close ends block)", () => { + const input = "outer inner still outervisible"; + expect(stripReasoningTagsFromText(input)).toBe("still outervisible"); + }); + + it("strips final tag markup but preserves content (by design)", () => { + const input = "A1B2C"; + expect(stripReasoningTagsFromText(input)).toBe("A1B2C"); + }); + + it("preserves final tags in inline code (markup only stripped outside)", () => { + const input = "`` in code, visible outside"; + expect(stripReasoningTagsFromText(input)).toBe("`` in code, visible outside"); + }); + + it("handles double backtick inline code with tags", () => { + const input = "Use ``code`` with hidden text"; + expect(stripReasoningTagsFromText(input)).toBe("Use ``code`` with text"); + }); + + it("handles fenced code blocks with content", () => { + const input = "Before\n```\ncode\n```\nAfter with hidden"; + expect(stripReasoningTagsFromText(input)).toBe("Before\n```\ncode\n```\nAfter with"); + }); + + it("does not match mismatched fence types (``` vs ~~~)", () => { + const input = "```\nnot protected\n~~~\ntext"; + const result = stripReasoningTagsFromText(input); + expect(result).toBe(input); + }); + + it("handles unicode content inside and around tags", () => { + const input = "你好 思考 🤔 世界"; + expect(stripReasoningTagsFromText(input)).toBe("你好 世界"); + }); + + it("handles very long content between tags efficiently", () => { + const longContent = "x".repeat(10000); + const input = `${longContent}visible`; + expect(stripReasoningTagsFromText(input)).toBe("visible"); + }); + + it("handles tags with attributes", () => { + const input = "A hidden B"; + expect(stripReasoningTagsFromText(input)).toBe("A B"); + }); + + it("is case-insensitive for tag names", () => { + const input = "A hidden also hidden B"; + expect(stripReasoningTagsFromText(input)).toBe("A B"); + }); + + it("handles pathological nested backtick patterns without hanging", () => { + const input = "`".repeat(100) + "test" + "`".repeat(100); + const start = Date.now(); + stripReasoningTagsFromText(input); + const elapsed = Date.now() - start; + expect(elapsed).toBeLessThan(1000); + }); + + it("handles unclosed inline code gracefully", () => { + const input = "Start `unclosed hidden end"; + const result = stripReasoningTagsFromText(input); + expect(result).toBe("Start `unclosed end"); + }); + }); + + describe("strict vs preserve mode", () => { + it("strict mode truncates on unclosed tag", () => { + const input = "Before unclosed content after"; + expect(stripReasoningTagsFromText(input, { mode: "strict" })).toBe("Before"); + }); + + it("preserve mode keeps content after unclosed tag", () => { + const input = "Before unclosed content after"; + expect(stripReasoningTagsFromText(input, { mode: "preserve" })).toBe( + "Before unclosed content after", + ); + }); + }); + + describe("trim options", () => { + it("trims both sides by default", () => { + const input = " x result y "; + expect(stripReasoningTagsFromText(input)).toBe("result"); + }); + + it("trim=none preserves whitespace", () => { + const input = " x result "; + expect(stripReasoningTagsFromText(input, { trim: "none" })).toBe(" result "); + }); + + it("trim=start only trims start", () => { + const input = " x result "; + expect(stripReasoningTagsFromText(input, { trim: "start" })).toBe("result "); + }); + }); +}); diff --git a/src/shared/text/reasoning-tags.ts b/src/shared/text/reasoning-tags.ts index 822138e556..afb8f891fe 100644 --- a/src/shared/text/reasoning-tags.ts +++ b/src/shared/text/reasoning-tags.ts @@ -2,8 +2,40 @@ export type ReasoningTagMode = "strict" | "preserve"; export type ReasoningTagTrim = "none" | "start" | "both"; const QUICK_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking|final)\b/i; -const FINAL_TAG_RE = /<\s*\/?\s*final\b[^>]*>/gi; -const THINKING_TAG_RE = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>/gi; +const FINAL_TAG_RE = /<\s*\/?\s*final\b[^<>]*>/gi; +const THINKING_TAG_RE = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\b[^<>]*>/gi; + +interface CodeRegion { + start: number; + end: number; +} + +function findCodeRegions(text: string): CodeRegion[] { + const regions: CodeRegion[] = []; + + const fencedRe = /(^|\n)(```|~~~)[^\n]*\n[\s\S]*?(?:\n\2(?:\n|$)|$)/g; + for (const match of text.matchAll(fencedRe)) { + const start = (match.index ?? 0) + match[1].length; + regions.push({ start, end: start + match[0].length - match[1].length }); + } + + const inlineRe = /`+[^`]+`+/g; + for (const match of text.matchAll(inlineRe)) { + const start = match.index ?? 0; + const end = start + match[0].length; + const insideFenced = regions.some((r) => start >= r.start && end <= r.end); + if (!insideFenced) { + regions.push({ start, end }); + } + } + + regions.sort((a, b) => a.start - b.start); + return regions; +} + +function isInsideCode(pos: number, regions: CodeRegion[]): boolean { + return regions.some((r) => pos >= r.start && pos < r.end); +} function applyTrim(value: string, mode: ReasoningTagTrim): string { if (mode === "none") return value; @@ -27,11 +59,29 @@ export function stripReasoningTagsFromText( let cleaned = text; if (FINAL_TAG_RE.test(cleaned)) { FINAL_TAG_RE.lastIndex = 0; - cleaned = cleaned.replace(FINAL_TAG_RE, ""); + const finalMatches: Array<{ start: number; length: number; inCode: boolean }> = []; + const preCodeRegions = findCodeRegions(cleaned); + for (const match of cleaned.matchAll(FINAL_TAG_RE)) { + const start = match.index ?? 0; + finalMatches.push({ + start, + length: match[0].length, + inCode: isInsideCode(start, preCodeRegions), + }); + } + + for (let i = finalMatches.length - 1; i >= 0; i--) { + const m = finalMatches[i]; + if (!m.inCode) { + cleaned = cleaned.slice(0, m.start) + cleaned.slice(m.start + m.length); + } + } } else { FINAL_TAG_RE.lastIndex = 0; } + const codeRegions = findCodeRegions(cleaned); + THINKING_TAG_RE.lastIndex = 0; let result = ""; let lastIndex = 0; @@ -41,6 +91,10 @@ export function stripReasoningTagsFromText( const idx = match.index ?? 0; const isClose = match[1] === "/"; + if (isInsideCode(idx, codeRegions)) { + continue; + } + if (!inThinking) { result += cleaned.slice(lastIndex, idx); if (!isClose) {