From ce715c4c569222e030f80e9d50b1bf00672b8f31 Mon Sep 17 00:00:00 2001 From: Vignesh Natarajan Date: Sat, 7 Feb 2026 16:04:40 -0800 Subject: [PATCH] Memory: harden QMD startup, timeouts, and fallback recovery --- CHANGELOG.md | 1 + docs/concepts/memory.md | 16 +- src/config/schema.ts | 10 ++ src/config/types.memory.ts | 4 + src/config/zod-schema.ts | 4 + src/memory/backend-config.test.ts | 26 ++++ src/memory/backend-config.ts | 27 ++++ src/memory/qmd-manager.test.ts | 238 +++++++++++++++++++++++++++--- src/memory/qmd-manager.ts | 58 ++++++-- src/memory/search-manager.test.ts | 54 ++++++- src/memory/search-manager.ts | 2 + 11 files changed, 396 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac4b57f6d1..ed44d04353 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ Docs: https://docs.openclaw.ai - Cron: store migration hardening (legacy field migration, parse error handling, explicit delivery mode persistence). (#10776) Thanks @tyler6204. - Gateway/CLI: when `gateway.bind=lan`, use a LAN IP for probe URLs and Control UI links. (#11448) Thanks @AnonO6. - Memory: set Voyage embeddings `input_type` for improved retrieval. (#10818) Thanks @mcinteerj. +- Memory/QMD: run boot refresh in background by default, add configurable QMD maintenance timeouts, and retry QMD after fallback failures. (#9690, #9705) - Media understanding: recognize `.caf` audio attachments for transcription. (#10982) Thanks @succ985. - Telegram: auto-inject DM topic threadId in message tool + subagent announce. (#7235) Thanks @Lukavyi. - Security: require auth for Gateway canvas host and A2UI assets. (#9518) Thanks @coygeek. diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md index e213ea5b53..646575fd8f 100644 --- a/docs/concepts/memory.md +++ b/docs/concepts/memory.md @@ -127,12 +127,18 @@ out to QMD for retrieval. Key points: - The gateway writes a self-contained QMD home under `~/.openclaw/agents//qmd/` (config + cache + sqlite DB). -- Collections are rewritten from `memory.qmd.paths` (plus default workspace - memory files) into `index.yml`, then `qmd update` + `qmd embed` run on boot and - on a configurable interval (`memory.qmd.update.interval`, default 5 m). +- Collections are created via `qmd collection add` from `memory.qmd.paths` + (plus default workspace memory files), then `qmd update` + `qmd embed` run + on boot and on a configurable interval (`memory.qmd.update.interval`, + default 5 m). +- Boot refresh now runs in the background by default so chat startup is not + blocked; set `memory.qmd.update.waitForBootSync = true` to keep the previous + blocking behavior. - Searches run via `qmd query --json`. If QMD fails or the binary is missing, OpenClaw automatically falls back to the builtin SQLite manager so memory tools keep working. +- OpenClaw does not expose QMD embed batch-size tuning today; batch behavior is + controlled by QMD itself. - **First search may be slow**: QMD may download local GGUF models (reranker/query expansion) on the first `qmd query` run. - OpenClaw sets `XDG_CONFIG_HOME`/`XDG_CACHE_HOME` automatically when it runs QMD. @@ -170,7 +176,9 @@ out to QMD for retrieval. Key points: stable `name`). - `sessions`: opt into session JSONL indexing (`enabled`, `retentionDays`, `exportDir`). -- `update`: controls refresh cadence (`interval`, `debounceMs`, `onBoot`, `embedInterval`). +- `update`: controls refresh cadence and maintenance execution: + (`interval`, `debounceMs`, `onBoot`, `waitForBootSync`, `embedInterval`, + `commandTimeoutMs`, `updateTimeoutMs`, `embedTimeoutMs`). - `limits`: clamp recall payload (`maxResults`, `maxSnippetChars`, `maxInjectedChars`, `timeoutMs`). - `scope`: same schema as [`session.sendPolicy`](/gateway/configuration#session). diff --git a/src/config/schema.ts b/src/config/schema.ts index a9c177c824..605c3b247d 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -271,7 +271,11 @@ const FIELD_LABELS: Record = { "memory.qmd.update.interval": "QMD Update Interval", "memory.qmd.update.debounceMs": "QMD Update Debounce (ms)", "memory.qmd.update.onBoot": "QMD Update on Startup", + "memory.qmd.update.waitForBootSync": "QMD Wait for Boot Sync", "memory.qmd.update.embedInterval": "QMD Embed Interval", + "memory.qmd.update.commandTimeoutMs": "QMD Command Timeout (ms)", + "memory.qmd.update.updateTimeoutMs": "QMD Update Timeout (ms)", + "memory.qmd.update.embedTimeoutMs": "QMD Embed Timeout (ms)", "memory.qmd.limits.maxResults": "QMD Max Results", "memory.qmd.limits.maxSnippetChars": "QMD Max Snippet Chars", "memory.qmd.limits.maxInjectedChars": "QMD Max Injected Chars", @@ -602,8 +606,14 @@ const FIELD_HELP: Record = { "memory.qmd.update.debounceMs": "Minimum delay between successive QMD refresh runs (default: 15000).", "memory.qmd.update.onBoot": "Run QMD update once on gateway startup (default: true).", + "memory.qmd.update.waitForBootSync": + "Block startup until the boot QMD refresh finishes (default: false).", "memory.qmd.update.embedInterval": "How often QMD embeddings are refreshed (duration string, default: 60m). Set to 0 to disable periodic embed.", + "memory.qmd.update.commandTimeoutMs": + "Timeout for QMD maintenance commands like collection list/add (default: 30000).", + "memory.qmd.update.updateTimeoutMs": "Timeout for `qmd update` runs (default: 120000).", + "memory.qmd.update.embedTimeoutMs": "Timeout for `qmd embed` runs (default: 120000).", "memory.qmd.limits.maxResults": "Max QMD results returned to the agent loop (default: 6).", "memory.qmd.limits.maxSnippetChars": "Max characters per snippet pulled from QMD (default: 700).", "memory.qmd.limits.maxInjectedChars": "Max total characters injected from QMD hits per turn.", diff --git a/src/config/types.memory.ts b/src/config/types.memory.ts index 2fc185200d..ca53e2d848 100644 --- a/src/config/types.memory.ts +++ b/src/config/types.memory.ts @@ -35,7 +35,11 @@ export type MemoryQmdUpdateConfig = { interval?: string; debounceMs?: number; onBoot?: boolean; + waitForBootSync?: boolean; embedInterval?: string; + commandTimeoutMs?: number; + updateTimeoutMs?: number; + embedTimeoutMs?: number; }; export type MemoryQmdLimitsConfig = { diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index 0d1b12bee1..6947a58760 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -53,7 +53,11 @@ const MemoryQmdUpdateSchema = z interval: z.string().optional(), debounceMs: z.number().int().nonnegative().optional(), onBoot: z.boolean().optional(), + waitForBootSync: z.boolean().optional(), embedInterval: z.string().optional(), + commandTimeoutMs: z.number().int().nonnegative().optional(), + updateTimeoutMs: z.number().int().nonnegative().optional(), + embedTimeoutMs: z.number().int().nonnegative().optional(), }) .strict(); diff --git a/src/memory/backend-config.test.ts b/src/memory/backend-config.test.ts index 98128d89b9..55b4a3bed3 100644 --- a/src/memory/backend-config.test.ts +++ b/src/memory/backend-config.test.ts @@ -26,6 +26,10 @@ describe("resolveMemoryBackendConfig", () => { expect(resolved.qmd?.collections.length).toBeGreaterThanOrEqual(3); expect(resolved.qmd?.command).toBe("qmd"); expect(resolved.qmd?.update.intervalMs).toBeGreaterThan(0); + expect(resolved.qmd?.update.waitForBootSync).toBe(false); + expect(resolved.qmd?.update.commandTimeoutMs).toBe(30_000); + expect(resolved.qmd?.update.updateTimeoutMs).toBe(120_000); + expect(resolved.qmd?.update.embedTimeoutMs).toBe(120_000); }); it("parses quoted qmd command paths", () => { @@ -67,4 +71,26 @@ describe("resolveMemoryBackendConfig", () => { const workspaceRoot = resolveAgentWorkspaceDir(cfg, "main"); expect(custom?.path).toBe(path.resolve(workspaceRoot, "notes")); }); + + it("resolves qmd update timeout overrides", () => { + const cfg = { + agents: { defaults: { workspace: "/tmp/memory-test" } }, + memory: { + backend: "qmd", + qmd: { + update: { + waitForBootSync: true, + commandTimeoutMs: 12_000, + updateTimeoutMs: 480_000, + embedTimeoutMs: 360_000, + }, + }, + }, + } as OpenClawConfig; + const resolved = resolveMemoryBackendConfig({ cfg, agentId: "main" }); + expect(resolved.qmd?.update.waitForBootSync).toBe(true); + expect(resolved.qmd?.update.commandTimeoutMs).toBe(12_000); + expect(resolved.qmd?.update.updateTimeoutMs).toBe(480_000); + expect(resolved.qmd?.update.embedTimeoutMs).toBe(360_000); + }); }); diff --git a/src/memory/backend-config.ts b/src/memory/backend-config.ts index 375398f0ea..0e48f6bff8 100644 --- a/src/memory/backend-config.ts +++ b/src/memory/backend-config.ts @@ -29,7 +29,11 @@ export type ResolvedQmdUpdateConfig = { intervalMs: number; debounceMs: number; onBoot: boolean; + waitForBootSync: boolean; embedIntervalMs: number; + commandTimeoutMs: number; + updateTimeoutMs: number; + embedTimeoutMs: number; }; export type ResolvedQmdLimitsConfig = { @@ -61,6 +65,9 @@ const DEFAULT_QMD_INTERVAL = "5m"; const DEFAULT_QMD_DEBOUNCE_MS = 15_000; const DEFAULT_QMD_TIMEOUT_MS = 4_000; const DEFAULT_QMD_EMBED_INTERVAL = "60m"; +const DEFAULT_QMD_COMMAND_TIMEOUT_MS = 30_000; +const DEFAULT_QMD_UPDATE_TIMEOUT_MS = 120_000; +const DEFAULT_QMD_EMBED_TIMEOUT_MS = 120_000; const DEFAULT_QMD_LIMITS: ResolvedQmdLimitsConfig = { maxResults: 6, maxSnippetChars: 700, @@ -140,6 +147,13 @@ function resolveDebounceMs(raw: number | undefined): number { return DEFAULT_QMD_DEBOUNCE_MS; } +function resolveTimeoutMs(raw: number | undefined, fallback: number): number { + if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) { + return Math.floor(raw); + } + return fallback; +} + function resolveLimits(raw?: MemoryQmdConfig["limits"]): ResolvedQmdLimitsConfig { const parsed: ResolvedQmdLimitsConfig = { ...DEFAULT_QMD_LIMITS }; if (raw?.maxResults && raw.maxResults > 0) { @@ -258,7 +272,20 @@ export function resolveMemoryBackendConfig(params: { intervalMs: resolveIntervalMs(qmdCfg?.update?.interval), debounceMs: resolveDebounceMs(qmdCfg?.update?.debounceMs), onBoot: qmdCfg?.update?.onBoot !== false, + waitForBootSync: qmdCfg?.update?.waitForBootSync === true, embedIntervalMs: resolveEmbedIntervalMs(qmdCfg?.update?.embedInterval), + commandTimeoutMs: resolveTimeoutMs( + qmdCfg?.update?.commandTimeoutMs, + DEFAULT_QMD_COMMAND_TIMEOUT_MS, + ), + updateTimeoutMs: resolveTimeoutMs( + qmdCfg?.update?.updateTimeoutMs, + DEFAULT_QMD_UPDATE_TIMEOUT_MS, + ), + embedTimeoutMs: resolveTimeoutMs( + qmdCfg?.update?.embedTimeoutMs, + DEFAULT_QMD_EMBED_TIMEOUT_MS, + ), }, limits: resolveLimits(qmdCfg?.limits), scope: qmdCfg?.scope ?? DEFAULT_QMD_SCOPE, diff --git a/src/memory/qmd-manager.test.ts b/src/memory/qmd-manager.test.ts index 9ed61914a6..2905fe7e5e 100644 --- a/src/memory/qmd-manager.test.ts +++ b/src/memory/qmd-manager.test.ts @@ -4,30 +4,35 @@ import os from "node:os"; import path from "node:path"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -vi.mock("node:child_process", () => { - const spawn = vi.fn((_cmd: string, _args: string[]) => { - const stdout = new EventEmitter(); - const stderr = new EventEmitter(); - const child = new EventEmitter() as { - stdout: EventEmitter; - stderr: EventEmitter; - kill: () => void; - emit: (event: string, code: number) => boolean; - }; - child.stdout = stdout; - child.stderr = stderr; - child.kill = () => { +type MockChild = EventEmitter & { + stdout: EventEmitter; + stderr: EventEmitter; + kill: (signal?: NodeJS.Signals) => void; + closeWith: (code?: number | null) => void; +}; + +function createMockChild(params?: { autoClose?: boolean; closeDelayMs?: number }): MockChild { + const stdout = new EventEmitter(); + const stderr = new EventEmitter(); + const child = new EventEmitter() as MockChild; + child.stdout = stdout; + child.stderr = stderr; + child.closeWith = (code = 0) => { + child.emit("close", code); + }; + child.kill = () => { + // Let timeout rejection win in tests that simulate hung QMD commands. + }; + if (params?.autoClose !== false) { + const delayMs = params?.closeDelayMs ?? 0; + setTimeout(() => { child.emit("close", 0); - }; - setImmediate(() => { - stdout.emit("data", ""); - stderr.emit("data", ""); - child.emit("close", 0); - }); - return child; - }); - return { spawn }; -}); + }, delayMs); + } + return child; +} + +vi.mock("node:child_process", () => ({ spawn: vi.fn() })); import { spawn as mockedSpawn } from "node:child_process"; import type { OpenClawConfig } from "../config/config.js"; @@ -44,7 +49,8 @@ describe("QmdMemoryManager", () => { const agentId = "main"; beforeEach(async () => { - spawnMock.mockClear(); + spawnMock.mockReset(); + spawnMock.mockImplementation(() => createMockChild()); tmpRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qmd-manager-test-")); workspaceDir = path.join(tmpRoot, "workspace"); await fs.mkdir(workspaceDir, { recursive: true }); @@ -97,6 +103,190 @@ describe("QmdMemoryManager", () => { await manager.close(); }); + it("runs boot update in background by default", async () => { + cfg = { + ...cfg, + memory: { + backend: "qmd", + qmd: { + includeDefaultMemory: false, + update: { interval: "0s", debounceMs: 60_000, onBoot: true }, + paths: [{ path: workspaceDir, pattern: "**/*.md", name: "workspace" }], + }, + }, + } as OpenClawConfig; + + let releaseUpdate: (() => void) | null = null; + spawnMock.mockImplementation((_cmd: string, args: string[]) => { + if (args[0] === "update") { + const child = createMockChild({ autoClose: false }); + releaseUpdate = () => child.closeWith(0); + return child; + } + return createMockChild(); + }); + + const resolved = resolveMemoryBackendConfig({ cfg, agentId }); + const createPromise = QmdMemoryManager.create({ cfg, agentId, resolved }); + const race = await Promise.race([ + createPromise.then(() => "created" as const), + new Promise<"timeout">((resolve) => setTimeout(() => resolve("timeout"), 80)), + ]); + expect(race).toBe("created"); + + if (!releaseUpdate) { + throw new Error("update child missing"); + } + releaseUpdate(); + const manager = await createPromise; + await manager?.close(); + }); + + it("can be configured to block startup on boot update", async () => { + cfg = { + ...cfg, + memory: { + backend: "qmd", + qmd: { + includeDefaultMemory: false, + update: { + interval: "0s", + debounceMs: 60_000, + onBoot: true, + waitForBootSync: true, + }, + paths: [{ path: workspaceDir, pattern: "**/*.md", name: "workspace" }], + }, + }, + } as OpenClawConfig; + + let releaseUpdate: (() => void) | null = null; + spawnMock.mockImplementation((_cmd: string, args: string[]) => { + if (args[0] === "update") { + const child = createMockChild({ autoClose: false }); + releaseUpdate = () => child.closeWith(0); + return child; + } + return createMockChild(); + }); + + const resolved = resolveMemoryBackendConfig({ cfg, agentId }); + const createPromise = QmdMemoryManager.create({ cfg, agentId, resolved }); + const race = await Promise.race([ + createPromise.then(() => "created" as const), + new Promise<"timeout">((resolve) => setTimeout(() => resolve("timeout"), 80)), + ]); + expect(race).toBe("timeout"); + + if (!releaseUpdate) { + throw new Error("update child missing"); + } + releaseUpdate(); + const manager = await createPromise; + await manager?.close(); + }); + + it("times out collection bootstrap commands", async () => { + cfg = { + ...cfg, + memory: { + backend: "qmd", + qmd: { + includeDefaultMemory: false, + update: { + interval: "0s", + debounceMs: 60_000, + onBoot: false, + commandTimeoutMs: 15, + }, + paths: [{ path: workspaceDir, pattern: "**/*.md", name: "workspace" }], + }, + }, + } as OpenClawConfig; + + spawnMock.mockImplementation((_cmd: string, args: string[]) => { + if (args[0] === "collection" && args[1] === "list") { + return createMockChild({ autoClose: false }); + } + return createMockChild(); + }); + + const resolved = resolveMemoryBackendConfig({ cfg, agentId }); + const manager = await QmdMemoryManager.create({ cfg, agentId, resolved }); + expect(manager).toBeTruthy(); + await manager?.close(); + }); + + it("times out qmd update during sync when configured", async () => { + cfg = { + ...cfg, + memory: { + backend: "qmd", + qmd: { + includeDefaultMemory: false, + update: { + interval: "0s", + debounceMs: 0, + onBoot: false, + updateTimeoutMs: 20, + }, + paths: [{ path: workspaceDir, pattern: "**/*.md", name: "workspace" }], + }, + }, + } as OpenClawConfig; + spawnMock.mockImplementation((_cmd: string, args: string[]) => { + if (args[0] === "update") { + return createMockChild({ autoClose: false }); + } + return createMockChild(); + }); + + const resolved = resolveMemoryBackendConfig({ cfg, agentId }); + const manager = await QmdMemoryManager.create({ cfg, agentId, resolved }); + expect(manager).toBeTruthy(); + if (!manager) { + throw new Error("manager missing"); + } + await expect(manager.sync({ reason: "manual" })).rejects.toThrow( + "qmd update timed out after 20ms", + ); + await manager.close(); + }); + + it("logs and continues when qmd embed times out", async () => { + cfg = { + ...cfg, + memory: { + backend: "qmd", + qmd: { + includeDefaultMemory: false, + update: { + interval: "0s", + debounceMs: 0, + onBoot: false, + embedTimeoutMs: 20, + }, + paths: [{ path: workspaceDir, pattern: "**/*.md", name: "workspace" }], + }, + }, + } as OpenClawConfig; + spawnMock.mockImplementation((_cmd: string, args: string[]) => { + if (args[0] === "embed") { + return createMockChild({ autoClose: false }); + } + return createMockChild(); + }); + + const resolved = resolveMemoryBackendConfig({ cfg, agentId }); + const manager = await QmdMemoryManager.create({ cfg, agentId, resolved }); + expect(manager).toBeTruthy(); + if (!manager) { + throw new Error("manager missing"); + } + await expect(manager.sync({ reason: "manual" })).resolves.toBeUndefined(); + await manager.close(); + }); + it("scopes by channel for agent-prefixed session keys", async () => { cfg = { ...cfg, diff --git a/src/memory/qmd-manager.ts b/src/memory/qmd-manager.ts index 4d315564d5..700d8fc615 100644 --- a/src/memory/qmd-manager.ts +++ b/src/memory/qmd-manager.ts @@ -28,6 +28,7 @@ import type { ResolvedMemoryBackendConfig, ResolvedQmdConfig } from "./backend-c const log = createSubsystemLogger("memory"); const SNIPPET_HEADER_RE = /@@\s*-([0-9]+),([0-9]+)/; +const SEARCH_PENDING_UPDATE_WAIT_MS = 500; type QmdQueryResult = { docid?: string; @@ -145,7 +146,16 @@ export class QmdMemoryManager implements MemorySearchManager { await this.ensureCollections(); if (this.qmd.update.onBoot) { - await this.runUpdate("boot", true); + const bootRun = this.runUpdate("boot", true); + if (this.qmd.update.waitForBootSync) { + await bootRun.catch((err) => { + log.warn(`qmd boot update failed: ${String(err)}`); + }); + } else { + void bootRun.catch((err) => { + log.warn(`qmd boot update failed: ${String(err)}`); + }); + } } if (this.qmd.update.intervalMs > 0) { this.updateTimer = setInterval(() => { @@ -172,7 +182,9 @@ export class QmdMemoryManager implements MemorySearchManager { // fall back to best-effort idempotent `qmd collection add`. const existing = new Set(); try { - const result = await this.runQmd(["collection", "list", "--json"]); + const result = await this.runQmd(["collection", "list", "--json"], { + timeoutMs: this.qmd.update.commandTimeoutMs, + }); const parsed = JSON.parse(result.stdout) as unknown; if (Array.isArray(parsed)) { for (const entry of parsed) { @@ -195,15 +207,20 @@ export class QmdMemoryManager implements MemorySearchManager { continue; } try { - await this.runQmd([ - "collection", - "add", - collection.path, - "--name", - collection.name, - "--mask", - collection.pattern, - ]); + await this.runQmd( + [ + "collection", + "add", + collection.path, + "--name", + collection.name, + "--mask", + collection.pattern, + ], + { + timeoutMs: this.qmd.update.commandTimeoutMs, + }, + ); } catch (err) { const message = err instanceof Error ? err.message : String(err); // Idempotency: qmd exits non-zero if the collection name already exists. @@ -229,7 +246,7 @@ export class QmdMemoryManager implements MemorySearchManager { if (!trimmed) { return []; } - await this.pendingUpdate?.catch(() => undefined); + await this.waitForPendingUpdateBeforeSearch(); const limit = Math.min( this.qmd.limits.maxResults, opts?.maxResults ?? this.qmd.limits.maxResults, @@ -376,7 +393,7 @@ export class QmdMemoryManager implements MemorySearchManager { } private async runUpdate(reason: string, force?: boolean): Promise { - if (this.pendingUpdate && !force) { + if (this.pendingUpdate) { return this.pendingUpdate; } if (this.shouldSkipUpdate(force)) { @@ -386,7 +403,7 @@ export class QmdMemoryManager implements MemorySearchManager { if (this.sessionExporter) { await this.exportSessions(); } - await this.runQmd(["update"], { timeoutMs: 120_000 }); + await this.runQmd(["update"], { timeoutMs: this.qmd.update.updateTimeoutMs }); const embedIntervalMs = this.qmd.update.embedIntervalMs; const shouldEmbed = Boolean(force) || @@ -394,7 +411,7 @@ export class QmdMemoryManager implements MemorySearchManager { (embedIntervalMs > 0 && Date.now() - this.lastEmbedAt > embedIntervalMs); if (shouldEmbed) { try { - await this.runQmd(["embed"], { timeoutMs: 120_000 }); + await this.runQmd(["embed"], { timeoutMs: this.qmd.update.embedTimeoutMs }); this.lastEmbedAt = Date.now(); } catch (err) { log.warn(`qmd embed failed (${reason}): ${String(err)}`); @@ -807,4 +824,15 @@ export class QmdMemoryManager implements MemorySearchManager { } return Date.now() - this.lastUpdateAt < debounceMs; } + + private async waitForPendingUpdateBeforeSearch(): Promise { + const pending = this.pendingUpdate; + if (!pending) { + return; + } + await Promise.race([ + pending.catch(() => undefined), + new Promise((resolve) => setTimeout(resolve, SEARCH_PENDING_UPDATE_WAIT_MS)), + ]); + } } diff --git a/src/memory/search-manager.test.ts b/src/memory/search-manager.test.ts index 8419d7f2c4..38f576cebf 100644 --- a/src/memory/search-manager.test.ts +++ b/src/memory/search-manager.test.ts @@ -30,7 +30,34 @@ vi.mock("./qmd-manager.js", () => ({ vi.mock("./manager.js", () => ({ MemoryIndexManager: { - get: vi.fn(async () => null), + get: vi.fn(async () => ({ + search: vi.fn(async () => [ + { + path: "MEMORY.md", + startLine: 1, + endLine: 1, + score: 1, + snippet: "fallback", + source: "memory", + }, + ]), + readFile: vi.fn(async () => ({ text: "", path: "MEMORY.md" })), + status: vi.fn(() => ({ + backend: "builtin" as const, + provider: "openai", + model: "text-embedding-3-small", + requestedProvider: "openai", + files: 0, + chunks: 0, + dirty: false, + workspaceDir: "/tmp", + dbPath: "/tmp/index.sqlite", + })), + sync: vi.fn(async () => {}), + probeEmbeddingAvailability: vi.fn(async () => ({ ok: true })), + probeVectorAvailability: vi.fn(async () => true), + close: vi.fn(async () => {}), + })), }, })); @@ -62,4 +89,29 @@ describe("getMemorySearchManager caching", () => { // eslint-disable-next-line @typescript-eslint/unbound-method expect(QmdMemoryManager.create).toHaveBeenCalledTimes(1); }); + + it("evicts failed qmd wrapper so next call retries qmd", async () => { + const retryAgentId = "retry-agent"; + const cfg = { + memory: { backend: "qmd", qmd: {} }, + agents: { list: [{ id: retryAgentId, default: true, workspace: "/tmp/workspace" }] }, + } as const; + + mockPrimary.search.mockRejectedValueOnce(new Error("qmd query failed")); + const first = await getMemorySearchManager({ cfg, agentId: retryAgentId }); + expect(first.manager).toBeTruthy(); + if (!first.manager) { + throw new Error("manager missing"); + } + + const fallbackResults = await first.manager.search("hello"); + expect(fallbackResults).toHaveLength(1); + expect(fallbackResults[0]?.path).toBe("MEMORY.md"); + + const second = await getMemorySearchManager({ cfg, agentId: retryAgentId }); + expect(second.manager).toBeTruthy(); + expect(second.manager).not.toBe(first.manager); + // eslint-disable-next-line @typescript-eslint/unbound-method + expect(QmdMemoryManager.create).toHaveBeenCalledTimes(2); + }); }); diff --git a/src/memory/search-manager.ts b/src/memory/search-manager.ts index 0efe71ee32..e3b4aaf8a3 100644 --- a/src/memory/search-manager.ts +++ b/src/memory/search-manager.ts @@ -89,6 +89,8 @@ class FallbackMemoryManager implements MemorySearchManager { this.lastError = err instanceof Error ? err.message : String(err); log.warn(`qmd memory failed; switching to builtin index: ${this.lastError}`); await this.deps.primary.close?.().catch(() => {}); + // Evict the failed wrapper so the next request can retry QMD with a fresh manager. + this.onClose?.(); } } const fallback = await this.ensureFallback();