From c114e762b2cdfdecad499a555f4a821c567f2e89 Mon Sep 17 00:00:00 2001 From: Anatol Zakrividoroga <53095479+anatolzak@users.noreply.github.com> Date: Wed, 9 Apr 2025 21:31:17 +0300 Subject: [PATCH 1/4] add amazon bedrock binary embeddings --- .../src/bedrock-embedding-model.test.ts | 47 +++++++++++++++++++ .../src/bedrock-embedding-model.ts | 27 ++++++++++- .../src/bedrock-embedding-settings.ts | 6 +++ 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/packages/amazon-bedrock/src/bedrock-embedding-model.test.ts b/packages/amazon-bedrock/src/bedrock-embedding-model.test.ts index 9254219659d1..8a1ca9b2474a 100644 --- a/packages/amazon-bedrock/src/bedrock-embedding-model.test.ts +++ b/packages/amazon-bedrock/src/bedrock-embedding-model.test.ts @@ -7,6 +7,11 @@ const mockEmbeddings = [ [-0.08, 0.06, -0.03, 0.02, 0.03], ]; +const mockBinaryEmbeddings = [ + [0, 1, 0, 1, 0], + [1, 0, 1, 0, 1], +]; + const fakeFetchWithAuth = injectFetchHeaders({ 'x-amz-auth': 'test-auth' }); const testValues = ['sunny day at the beach', 'rainy day in the city']; @@ -158,4 +163,46 @@ describe('doEmbed', () => { expect(requestHeaders['signed-header']).toBe('signed-value'); expect(requestHeaders['authorization']).toBe('AWS4-HMAC-SHA256...'); }); + + it('should work with binary embeddings', async () => { + const modelWithBinaryEmbeddings = new BedrockEmbeddingModel( + 'amazon.titan-embed-text-v2:0', + { embeddingType: 'binary' }, + { + baseUrl: () => 'https://bedrock-runtime.us-east-1.amazonaws.com', + headers: mockConfigHeaders, + fetch: fakeFetchWithAuth, + }, + ); + + server.urls[embedUrl].response = { + type: 'binary', + headers: { + 'content-type': 'application/json', + }, + body: Buffer.from( + JSON.stringify({ + embeddingsByType: { + binary: mockBinaryEmbeddings[0], + }, + inputTextTokenCount: 8, + }), + ), + }; + + const { embeddings } = await modelWithBinaryEmbeddings.doEmbed({ + values: [testValues[0]], + }); + + expect(embeddings.length).toBe(1); + expect(embeddings[0]).toStrictEqual(mockBinaryEmbeddings[0]); + + const body = await server.calls[0].requestBody; + expect(body).toEqual({ + inputText: testValues[0], + dimensions: undefined, + normalize: undefined, + embeddingTypes: ['binary'], + }); + }); }); diff --git a/packages/amazon-bedrock/src/bedrock-embedding-model.ts b/packages/amazon-bedrock/src/bedrock-embedding-model.ts index c4e7b9e7da8f..7e5b996bc178 100644 --- a/packages/amazon-bedrock/src/bedrock-embedding-model.ts +++ b/packages/amazon-bedrock/src/bedrock-embedding-model.ts @@ -48,11 +48,16 @@ export class BedrockEmbeddingModel implements EmbeddingModelV1 { EmbeddingModelV1['doEmbed'] >[0]): Promise { const embedSingleText = async (inputText: string) => { + const isBinaryEmbedding = this.settings.embeddingType === 'binary'; + // https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_InvokeModel.html const args = { inputText, dimensions: this.settings.dimensions, normalize: this.settings.normalize, + ...(isBinaryEmbedding && { + embeddingTypes: ['binary'], + }), }; const url = this.getUrl(this.modelId); const { value: response } = await postJsonToApi({ @@ -66,14 +71,19 @@ export class BedrockEmbeddingModel implements EmbeddingModelV1 { errorToMessage: error => `${error.type}: ${error.message}`, }), successfulResponseHandler: createJsonResponseHandler( - BedrockEmbeddingResponseSchema, + z.union([ + BedrockBinaryEmbeddingResponseSchema, + BedrockEmbeddingResponseSchema, + ]), ), fetch: this.config.fetch, abortSignal, }); return { - embedding: response.embedding, + embedding: isBinaryEmbedding + ? (response as BedrockBinaryEmbeddingResponse).embeddingsByType.binary + : (response as BedrockEmbeddingResponse).embedding, inputTextTokenCount: response.inputTextTokenCount, }; }; @@ -97,3 +107,16 @@ const BedrockEmbeddingResponseSchema = z.object({ embedding: z.array(z.number()), inputTextTokenCount: z.number(), }); + +type BedrockEmbeddingResponse = z.infer; + +const BedrockBinaryEmbeddingResponseSchema = z.object({ + embeddingsByType: z.object({ + binary: z.array(z.number()), + }), + inputTextTokenCount: z.number(), +}); + +type BedrockBinaryEmbeddingResponse = z.infer< + typeof BedrockBinaryEmbeddingResponseSchema +>; diff --git a/packages/amazon-bedrock/src/bedrock-embedding-settings.ts b/packages/amazon-bedrock/src/bedrock-embedding-settings.ts index d77bc6fcb610..886170a8b9d2 100644 --- a/packages/amazon-bedrock/src/bedrock-embedding-settings.ts +++ b/packages/amazon-bedrock/src/bedrock-embedding-settings.ts @@ -17,4 +17,10 @@ Flag indicating whether or not to normalize the output embeddings. Defaults to t Only supported in amazon.titan-embed-text-v2:0. */ normalize?: boolean; + + /** +The type of embedding to return. Defaults to float. +Binary embeddings are only supported in amazon.titan-embed-text-v2:0. + */ + embeddingType?: 'float' | 'binary'; } From 6c18b581e91257652275b30ed991f6f7d1e439c1 Mon Sep 17 00:00:00 2001 From: Anatol Zakrividoroga <53095479+anatolzak@users.noreply.github.com> Date: Wed, 9 Apr 2025 21:31:33 +0300 Subject: [PATCH 2/4] add example for amazon bedrock binary embeddings --- examples/ai-core/src/embed/amazon-bedrock.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/examples/ai-core/src/embed/amazon-bedrock.ts b/examples/ai-core/src/embed/amazon-bedrock.ts index 11b7472630ac..8438d560fca2 100644 --- a/examples/ai-core/src/embed/amazon-bedrock.ts +++ b/examples/ai-core/src/embed/amazon-bedrock.ts @@ -10,6 +10,16 @@ async function main() { console.log(embedding); console.log(usage); + + const { embedding: binaryEmbedding, usage: binaryUsage } = await embed({ + model: bedrock.embedding('amazon.titan-embed-text-v2:0', { + embeddingType: 'binary', + }), + value: 'sunny day at the beach', + }); + + console.log(binaryEmbedding); + console.log(binaryUsage); } main().catch(console.error); From 7f4c7bd4fdac5a437551b8694fca08509027ac34 Mon Sep 17 00:00:00 2001 From: Anatol Zakrividoroga <53095479+anatolzak@users.noreply.github.com> Date: Wed, 9 Apr 2025 21:33:53 +0300 Subject: [PATCH 3/4] changeset --- .changeset/thick-wolves-bake.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/thick-wolves-bake.md diff --git a/.changeset/thick-wolves-bake.md b/.changeset/thick-wolves-bake.md new file mode 100644 index 000000000000..2e48d5816c0c --- /dev/null +++ b/.changeset/thick-wolves-bake.md @@ -0,0 +1,5 @@ +--- +'@ai-sdk/amazon-bedrock': patch +--- + +Allow generating binary embeddings using Amazon Titan Text Embeddings V2. From 5d31e342e514617aef677e087bf6ee5b71b4a8f5 Mon Sep 17 00:00:00 2001 From: Anatol Zakrividoroga <53095479+anatolzak@users.noreply.github.com> Date: Thu, 10 Apr 2025 11:20:04 +0300 Subject: [PATCH 4/4] update docs for amazon bedrock binary embeddings --- content/providers/01-ai-sdk-providers/08-amazon-bedrock.mdx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/content/providers/01-ai-sdk-providers/08-amazon-bedrock.mdx b/content/providers/01-ai-sdk-providers/08-amazon-bedrock.mdx index d32e5051d257..9d4e77e5e465 100644 --- a/content/providers/01-ai-sdk-providers/08-amazon-bedrock.mdx +++ b/content/providers/01-ai-sdk-providers/08-amazon-bedrock.mdx @@ -453,6 +453,10 @@ The following optional settings are available for Bedrock Titan embedding models Flag indicating whether or not to normalize the output embeddings. Defaults to true. +- **embeddingType** _'float' | 'binary'_ + + The type of embedding to return. Defaults to 'float'. Binary embeddings are only supported in amazon.titan-embed-text-v2:0. + ### Model Capabilities | Model | Default Dimensions | Custom Dimensions |