fix(memory): add input_type to Voyage AI embeddings for improved retrieval (#10818)

* fix(memory): add input_type to Voyage AI embeddings for improved retrieval Voyage AI recommends passing input_type='document' when indexing and input_type='query' when searching. This improves retrieval quality by optimising the embedding space for each direction. Changes: - embedQuery now passes input_type: 'query' - embedBatch now passes input_type: 'document' - Batch API request_params includes input_type: 'document' - Tests updated to verify input_type is passed correctly * Changelog: note Voyage embeddings input_type fix (#10818) (thanks @mcinteerj) --------- Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
2026-02-08 21:09:23 +08:00 · 2026-02-07 16:55:09 +13:00
parent 4c1da23a71
commit e78ae48e69
5 changed files with 57 additions and 8 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai

 - Cron: scheduler reliability (timer drift, restart catch-up, lock contention, stale running markers). (#10776) Thanks @tyler6204.
 - Cron: store migration hardening (legacy field migration, parse error handling, explicit delivery mode persistence). (#10776) Thanks @tyler6204.
+- Memory: set Voyage embeddings `input_type` for improved retrieval. (#10818) Thanks @mcinteerj.
 - Telegram: auto-inject DM topic threadId in message tool + subagent announce. (#7235) Thanks @Lukavyi.
 - Security: require auth for Gateway canvas host and A2UI assets. (#9518) Thanks @coygeek.
 - Cron: fix scheduling and reminder delivery regressions; harden next-run recompute + timer re-arming + legacy schedule fields. (#9733, #9823, #9948, #9932) Thanks @tyler6204, @pycckuu, @j2h4u, @fujiwara-tofu-shop.
--- a/src/memory/batch-voyage.test.ts
+++ b/src/memory/batch-voyage.test.ts
@@ -1,5 +1,5 @@
-import { afterEach, describe, expect, it, vi } from "vitest";
 import { ReadableStream } from "node:stream/web";
+import { afterEach, describe, expect, it, vi } from "vitest";
 import type { VoyageBatchOutputLine, VoyageBatchRequest } from "./batch-voyage.js";
 import type { VoyageEmbeddingClient } from "./embeddings-voyage.js";

@@ -114,6 +114,10 @@ describe("runVoyageEmbeddingBatches", () => {
    const createBody = JSON.parse(fetchMock.mock.calls[1][1].body);
    expect(createBody.input_file_id).toBe("file-123");
    expect(createBody.completion_window).toBe("12h");
+    expect(createBody.request_params).toEqual({
+      model: "voyage-4-large",
+      input_type: "document",
+    });

    // Verify Content Fetch
    expect(fetchMock.mock.calls[3][0]).toContain("/files/file-out-999/content");
--- a/src/memory/batch-voyage.ts
+++ b/src/memory/batch-voyage.ts
@@ -1,8 +1,7 @@
 import { createInterface } from "node:readline";
 import { Readable } from "node:stream";
-
-import { retryAsync } from "../infra/retry.js";
 import type { VoyageEmbeddingClient } from "./embeddings-voyage.js";
+import { retryAsync } from "../infra/retry.js";
 import { hashText, runWithConcurrency } from "./internal.js";

 /**
@@ -110,6 +109,7 @@ async function submitVoyageBatch(params: {
          completion_window: VOYAGE_BATCH_COMPLETION_WINDOW,
          request_params: {
            model: params.client.model,
+            input_type: "document",
          },
          metadata: {
            source: "clawdbot-memory",
--- a/src/memory/embeddings-voyage.test.ts
+++ b/src/memory/embeddings-voyage.test.ts
@@ -59,6 +59,7 @@ describe("voyage embedding provider", () => {
    expect(body).toEqual({
      model: "voyage-4-large",
      input: ["test query"],
+      input_type: "query",
    });
  });

@@ -90,6 +91,43 @@ describe("voyage embedding provider", () => {
    expect(headers["X-Custom"]).toBe("123");
  });

+  it("passes input_type=document for embedBatch", async () => {
+    const fetchMock = vi.fn(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({
+        data: [{ embedding: [0.1, 0.2] }, { embedding: [0.3, 0.4] }],
+      }),
+    })) as unknown as typeof fetch;
+    vi.stubGlobal("fetch", fetchMock);
+
+    const { createVoyageEmbeddingProvider } = await import("./embeddings-voyage.js");
+    const authModule = await import("../agents/model-auth.js");
+
+    vi.mocked(authModule.resolveApiKeyForProvider).mockResolvedValue({
+      apiKey: "voyage-key-123",
+      mode: "api-key",
+      source: "test",
+    });
+
+    const result = await createVoyageEmbeddingProvider({
+      config: {} as never,
+      provider: "voyage",
+      model: "voyage-4-large",
+      fallback: "none",
+    });
+
+    await result.provider.embedBatch(["doc1", "doc2"]);
+
+    const [, init] = fetchMock.mock.calls[0] ?? [];
+    const body = JSON.parse(init?.body as string);
+    expect(body).toEqual({
+      model: "voyage-4-large",
+      input: ["doc1", "doc2"],
+      input_type: "document",
+    });
+  });
+
  it("normalizes model names", async () => {
    const { normalizeVoyageModel } = await import("./embeddings-voyage.js");
    expect(normalizeVoyageModel("voyage/voyage-large-2")).toBe("voyage-large-2");
--- a/src/memory/embeddings-voyage.ts
+++ b/src/memory/embeddings-voyage.ts
@@ -1,5 +1,5 @@
-import { requireApiKey, resolveApiKeyForProvider } from "../agents/model-auth.js";
 import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.js";
+import { requireApiKey, resolveApiKeyForProvider } from "../agents/model-auth.js";

 export type VoyageEmbeddingClient = {
  baseUrl: string;
@@ -23,12 +23,18 @@ export async function createVoyageEmbeddingProvider(
  const client = await resolveVoyageEmbeddingClient(options);
  const url = `${client.baseUrl.replace(/\/$/, "")}/embeddings`;

-  const embed = async (input: string[]): Promise<number[][]> => {
+  const embed = async (input: string[], input_type?: "query" | "document"): Promise<number[][]> => {
    if (input.length === 0) return [];
+    const body: { model: string; input: string[]; input_type?: "query" | "document" } = {
+      model: client.model,
+      input,
+    };
+    if (input_type) body.input_type = input_type;
+
    const res = await fetch(url, {
      method: "POST",
      headers: client.headers,
-      body: JSON.stringify({ model: client.model, input }),
+      body: JSON.stringify(body),
    });
    if (!res.ok) {
      const text = await res.text();
@@ -46,10 +52,10 @@ export async function createVoyageEmbeddingProvider(
      id: "voyage",
      model: client.model,
      embedQuery: async (text) => {
-        const [vec] = await embed([text]);
+        const [vec] = await embed([text], "query");
        return vec ?? [];
      },
-      embedBatch: embed,
+      embedBatch: async (texts) => embed(texts, "document"),
    },
    client,
  };