mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-08 21:09:23 +08:00
fix(memory): add input_type to Voyage AI embeddings for improved retrieval (#10818)
* fix(memory): add input_type to Voyage AI embeddings for improved retrieval Voyage AI recommends passing input_type='document' when indexing and input_type='query' when searching. This improves retrieval quality by optimising the embedding space for each direction. Changes: - embedQuery now passes input_type: 'query' - embedBatch now passes input_type: 'document' - Batch API request_params includes input_type: 'document' - Tests updated to verify input_type is passed correctly * Changelog: note Voyage embeddings input_type fix (#10818) (thanks @mcinteerj) --------- Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Cron: scheduler reliability (timer drift, restart catch-up, lock contention, stale running markers). (#10776) Thanks @tyler6204.
|
||||
- Cron: store migration hardening (legacy field migration, parse error handling, explicit delivery mode persistence). (#10776) Thanks @tyler6204.
|
||||
- Memory: set Voyage embeddings `input_type` for improved retrieval. (#10818) Thanks @mcinteerj.
|
||||
- Telegram: auto-inject DM topic threadId in message tool + subagent announce. (#7235) Thanks @Lukavyi.
|
||||
- Security: require auth for Gateway canvas host and A2UI assets. (#9518) Thanks @coygeek.
|
||||
- Cron: fix scheduling and reminder delivery regressions; harden next-run recompute + timer re-arming + legacy schedule fields. (#9733, #9823, #9948, #9932) Thanks @tyler6204, @pycckuu, @j2h4u, @fujiwara-tofu-shop.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { ReadableStream } from "node:stream/web";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import type { VoyageBatchOutputLine, VoyageBatchRequest } from "./batch-voyage.js";
|
||||
import type { VoyageEmbeddingClient } from "./embeddings-voyage.js";
|
||||
|
||||
@@ -114,6 +114,10 @@ describe("runVoyageEmbeddingBatches", () => {
|
||||
const createBody = JSON.parse(fetchMock.mock.calls[1][1].body);
|
||||
expect(createBody.input_file_id).toBe("file-123");
|
||||
expect(createBody.completion_window).toBe("12h");
|
||||
expect(createBody.request_params).toEqual({
|
||||
model: "voyage-4-large",
|
||||
input_type: "document",
|
||||
});
|
||||
|
||||
// Verify Content Fetch
|
||||
expect(fetchMock.mock.calls[3][0]).toContain("/files/file-out-999/content");
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import { createInterface } from "node:readline";
|
||||
import { Readable } from "node:stream";
|
||||
|
||||
import { retryAsync } from "../infra/retry.js";
|
||||
import type { VoyageEmbeddingClient } from "./embeddings-voyage.js";
|
||||
import { retryAsync } from "../infra/retry.js";
|
||||
import { hashText, runWithConcurrency } from "./internal.js";
|
||||
|
||||
/**
|
||||
@@ -110,6 +109,7 @@ async function submitVoyageBatch(params: {
|
||||
completion_window: VOYAGE_BATCH_COMPLETION_WINDOW,
|
||||
request_params: {
|
||||
model: params.client.model,
|
||||
input_type: "document",
|
||||
},
|
||||
metadata: {
|
||||
source: "clawdbot-memory",
|
||||
|
||||
@@ -59,6 +59,7 @@ describe("voyage embedding provider", () => {
|
||||
expect(body).toEqual({
|
||||
model: "voyage-4-large",
|
||||
input: ["test query"],
|
||||
input_type: "query",
|
||||
});
|
||||
});
|
||||
|
||||
@@ -90,6 +91,43 @@ describe("voyage embedding provider", () => {
|
||||
expect(headers["X-Custom"]).toBe("123");
|
||||
});
|
||||
|
||||
it("passes input_type=document for embedBatch", async () => {
|
||||
const fetchMock = vi.fn(async () => ({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({
|
||||
data: [{ embedding: [0.1, 0.2] }, { embedding: [0.3, 0.4] }],
|
||||
}),
|
||||
})) as unknown as typeof fetch;
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const { createVoyageEmbeddingProvider } = await import("./embeddings-voyage.js");
|
||||
const authModule = await import("../agents/model-auth.js");
|
||||
|
||||
vi.mocked(authModule.resolveApiKeyForProvider).mockResolvedValue({
|
||||
apiKey: "voyage-key-123",
|
||||
mode: "api-key",
|
||||
source: "test",
|
||||
});
|
||||
|
||||
const result = await createVoyageEmbeddingProvider({
|
||||
config: {} as never,
|
||||
provider: "voyage",
|
||||
model: "voyage-4-large",
|
||||
fallback: "none",
|
||||
});
|
||||
|
||||
await result.provider.embedBatch(["doc1", "doc2"]);
|
||||
|
||||
const [, init] = fetchMock.mock.calls[0] ?? [];
|
||||
const body = JSON.parse(init?.body as string);
|
||||
expect(body).toEqual({
|
||||
model: "voyage-4-large",
|
||||
input: ["doc1", "doc2"],
|
||||
input_type: "document",
|
||||
});
|
||||
});
|
||||
|
||||
it("normalizes model names", async () => {
|
||||
const { normalizeVoyageModel } = await import("./embeddings-voyage.js");
|
||||
expect(normalizeVoyageModel("voyage/voyage-large-2")).toBe("voyage-large-2");
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { requireApiKey, resolveApiKeyForProvider } from "../agents/model-auth.js";
|
||||
import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.js";
|
||||
import { requireApiKey, resolveApiKeyForProvider } from "../agents/model-auth.js";
|
||||
|
||||
export type VoyageEmbeddingClient = {
|
||||
baseUrl: string;
|
||||
@@ -23,12 +23,18 @@ export async function createVoyageEmbeddingProvider(
|
||||
const client = await resolveVoyageEmbeddingClient(options);
|
||||
const url = `${client.baseUrl.replace(/\/$/, "")}/embeddings`;
|
||||
|
||||
const embed = async (input: string[]): Promise<number[][]> => {
|
||||
const embed = async (input: string[], input_type?: "query" | "document"): Promise<number[][]> => {
|
||||
if (input.length === 0) return [];
|
||||
const body: { model: string; input: string[]; input_type?: "query" | "document" } = {
|
||||
model: client.model,
|
||||
input,
|
||||
};
|
||||
if (input_type) body.input_type = input_type;
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: client.headers,
|
||||
body: JSON.stringify({ model: client.model, input }),
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const text = await res.text();
|
||||
@@ -46,10 +52,10 @@ export async function createVoyageEmbeddingProvider(
|
||||
id: "voyage",
|
||||
model: client.model,
|
||||
embedQuery: async (text) => {
|
||||
const [vec] = await embed([text]);
|
||||
const [vec] = await embed([text], "query");
|
||||
return vec ?? [];
|
||||
},
|
||||
embedBatch: embed,
|
||||
embedBatch: async (texts) => embed(texts, "document"),
|
||||
},
|
||||
client,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user