Feature sentence fix (#141)

radames · coyotte508 · web-flow · commit 1221acb9d48b · 2023-04-13T23:15:15.000+02:00
Co-authored-by: Eliott C &lt;coyotte508@gmail.com&gt;
diff --git a/packages/inference/README.md b/packages/inference/README.md
@@ -109,7 +109,7 @@ await hf.conversational({
   }
 })
 
-await hf.featureExtraction({
+await hf.sentenceSimilarity({
   model: 'sentence-transformers/paraphrase-xlm-r-multilingual-v1',
   inputs: {
     source_sentence: 'That is a happy person',
@@ -121,6 +121,11 @@ await hf.featureExtraction({
   }
 })
 
+await hf.featureExtraction({
+    model: "sentence-transformers/distilbert-base-nli-mean-tokens",
+    inputs: "That is a happy person",
+});
+
 // Audio
 
 await hf.automaticSpeechRecognition({
diff --git a/packages/inference/src/HfInference.ts b/packages/inference/src/HfInference.ts
@@ -449,8 +449,22 @@ export interface ConversationalReturn {
 	generated_text: string;
 	warnings: string[];
 }
-
 export type FeatureExtractionArgs = Args & {
+	/**
+	 *  The inputs is a string or a list of strings to get the features from.
+	 *
+	 *  inputs: "That is a happy person",
+	 *
+	 */
+	inputs: string | string[];
+};
+
+/**
+ * Returned values are a list of floats, or a list of list of floats (depending on if you sent a string or a list of string, and if the automatic reduction, usually mean_pooling for instance was applied for you or not. This should be explained on the model's README.
+ */
+export type FeatureExtractionReturn = (number | number[])[];
+
+export type SentenceSimiliarityArgs = Args & {
 	/**
 	 * The inputs vary based on the model. For example when using sentence-transformers/paraphrase-xlm-r-multilingual-v1 the inputs will look like this:
 	 *
@@ -463,9 +477,9 @@ export type FeatureExtractionArgs = Args & {
 };
 
 /**
- * Returned values are a list of floats, or a list of list of floats (depending on if you sent a string or a list of string, and if the automatic reduction, usually mean_pooling for instance was applied for you or not. This should be explained on the model's README.
+ * Returned values are a list of floats
  */
-export type FeatureExtractionReturn = (number | number[])[];
+export type SentenceSimiliarityReturn = number[];
 
 export type ImageClassificationArgs = Args & {
 	/**
@@ -834,6 +848,44 @@ export class HfInference {
 	 */
 	public async featureExtraction(args: FeatureExtractionArgs, options?: Options): Promise<FeatureExtractionReturn> {
 		const res = await this.request<FeatureExtractionReturn>(args, options);
+		let isValidOutput = true;
+		// Check if output is an array
+		if (Array.isArray(res)) {
+			for (const e of res) {
+				// Check if output is an array of arrays or numbers
+				if (Array.isArray(e)) {
+					// if all elements are numbers, continue
+					isValidOutput = e.every((x) => typeof x === "number");
+					if (!isValidOutput) {
+						break;
+					}
+				} else if (typeof e !== "number") {
+					isValidOutput = false;
+					break;
+				}
+			}
+		} else {
+			isValidOutput = false;
+		}
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<Array<number> | number>");
+		}
+		return res;
+	}
+
+	/**
+	 * Calculate the semantic similarity between one text and a list of other sentences by comparing their embeddings.
+	 */
+	public async sentenceSimiliarity(
+		args: SentenceSimiliarityArgs,
+		options?: Options
+	): Promise<SentenceSimiliarityReturn> {
+		const res = await this.request<SentenceSimiliarityReturn>(args, options);
+
+		const isValidOutput = Array.isArray(res) && res.every((x) => typeof x === "number");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<number>");
+		}
 		return res;
 	}
 
diff --git a/packages/inference/test/HfInference.spec.ts b/packages/inference/test/HfInference.spec.ts
@@ -267,9 +267,9 @@ describe.concurrent(
 				warnings: ["Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation."],
 			});
 		});
-		it("featureExtraction", async () => {
+		it("SentenceSimiliarity", async () => {
 			expect(
-				await hf.featureExtraction({
+				await hf.sentenceSimiliarity({
 					model: "sentence-transformers/paraphrase-xlm-r-multilingual-v1",
 					inputs: {
 						source_sentence: "That is a happy person",
@@ -278,6 +278,13 @@ describe.concurrent(
 				})
 			).toEqual([expect.any(Number), expect.any(Number), expect.any(Number)]);
 		});
+		it("FeatureExtraction", async () => {
+			const response = await hf.featureExtraction({
+				model: "sentence-transformers/distilbert-base-nli-mean-tokens",
+				inputs: "That is a happy person",
+			});
+			expect(response).toEqual(expect.arrayContaining([expect.any(Number)]));
+		});
 		it("automaticSpeechRecognition", async () => {
 			expect(
 				await hf.automaticSpeechRecognition({
diff --git a/packages/inference/test/tapes.json b/packages/inference/test/tapes.json