[Inference] fix feature extraction (embeddings) for sambanova (#1364)

hanouticelina · web-flow · commit 457119ebd1d6 · 2025-04-22T10:45:04.000+02:00
This PR fixes the widget of https://huggingface.co/intfloat/e5-mistral-7b-instruct i didn’t want to introduce a breaking change in this PR, but we should consider adding support for the OpenAI [Embeddings API](https://platform.openai.com/docs/api-reference/embeddings). Other providers like [Fireworks AI](https://docs.fireworks.ai/guides/querying-embeddings-models) and [Together](https://docs.together.ai/docs/embeddings-overview) also host embedding models with OpenAI-compatible endpoints. AFAIK, TEI supports the OAI Embeddings API as well.
diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts
@@ -119,6 +119,7 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
 	},
 	sambanova: {
 		conversational: new Sambanova.SambanovaConversationalTask(),
+		"feature-extraction": new Sambanova.SambanovaFeatureExtractionTask(),
 	},
 	together: {
 		"text-to-image": new Together.TogetherTextToImageTask(),
diff --git a/packages/inference/src/providers/sambanova.ts b/packages/inference/src/providers/sambanova.ts
@@ -14,10 +14,42 @@
  *
  * Thanks!
  */
-import { BaseConversationalTask } from "./providerHelper";
+import { InferenceOutputError } from "../lib/InferenceOutputError";
+
+import type { FeatureExtractionOutput } from "@huggingface/tasks";
+import type { BodyParams } from "../types";
+import type { FeatureExtractionTaskHelper } from "./providerHelper";
+import { BaseConversationalTask, TaskProviderHelper } from "./providerHelper";
 
 export class SambanovaConversationalTask extends BaseConversationalTask {
 	constructor() {
 		super("sambanova", "https://api.sambanova.ai");
 	}
 }
+
+export class SambanovaFeatureExtractionTask extends TaskProviderHelper implements FeatureExtractionTaskHelper {
+	constructor() {
+		super("sambanova", "https://api.sambanova.ai");
+	}
+
+	override makeRoute(): string {
+		return `/v1/embeddings`;
+	}
+
+	override async getResponse(response: FeatureExtractionOutput): Promise<FeatureExtractionOutput> {
+		if (typeof response === "object" && "data" in response && Array.isArray(response.data)) {
+			return response.data.map((item) => item.embedding);
+		}
+		throw new InferenceOutputError(
+			"Expected Sambanova feature-extraction (embeddings) response format to be {'data' : list of {'embedding' : number[]}}"
+		);
+	}
+
+	override preparePayload(params: BodyParams): Record<string, unknown> {
+		return {
+			model: params.model,
+			input: params.args.inputs,
+			...params.args,
+		};
+	}
+}
diff --git a/packages/inference/src/tasks/nlp/featureExtraction.ts b/packages/inference/src/tasks/nlp/featureExtraction.ts
@@ -3,7 +3,12 @@ import { getProviderHelper } from "../../lib/getProviderHelper";
 import type { BaseArgs, Options } from "../../types";
 import { innerRequest } from "../../utils/request";
 
-export type FeatureExtractionArgs = BaseArgs & FeatureExtractionInput;
+interface FeatureExtractionOAICompatInput {
+	encoding_format?: "float" | "base64";
+	dimensions?: number | null;
+}
+
+export type FeatureExtractionArgs = BaseArgs & FeatureExtractionInput & FeatureExtractionOAICompatInput;
 
 /**
  * Returned values are a multidimensional array of floats (dimension depending on if you sent a string or a list of string, and if the automatic reduction, usually mean_pooling for instance was applied for you or not. This should be explained on the model's README).
diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts
@@ -6,15 +6,15 @@ import type { TextToImageArgs } from "../src";
 import {
 	chatCompletion,
 	chatCompletionStream,
+	HfInference,
 	InferenceClient,
 	textGeneration,
 	textToImage,
-	HfInference,
 } from "../src";
+import { isUrl } from "../src/lib/isUrl";
+import { HARDCODED_MODEL_INFERENCE_MAPPING } from "../src/providers/consts";
 import { readTestFile } from "./test-files";
 import "./vcr";
-import { HARDCODED_MODEL_INFERENCE_MAPPING } from "../src/providers/consts";
-import { isUrl } from "../src/lib/isUrl";
 
 const TIMEOUT = 60000 * 3;
 const env = import.meta.env;
@@ -1176,6 +1176,15 @@ describe.concurrent("InferenceClient", () => {
 				}
 				expect(out).toContain("2");
 			});
+			it("featureExtraction", async () => {
+				const res = await client.featureExtraction({
+					model: "intfloat/e5-mistral-7b-instruct",
+					provider: "sambanova",
+					inputs: "Today is a sunny day and I will get some ice cream.",
+				});
+				expect(res).toBeInstanceOf(Array);
+				expect(res[0]).toBeInstanceOf(Array);
+			});
 		},
 		TIMEOUT
 	);
diff --git a/packages/inference/test/tapes.json b/packages/inference/test/tapes.json