Check if api inference output is correct (#125)

Mishig · web-flow · commit b5d8fb05b934 · 2023-04-04T10:06:42.000+02:00
This PR adds checks to api inference calls:
1. Checks if api inference outputs are as expected (for example,
TextClaissification api output should be `array&lt;{score: number, label:
string}&gt;`)
2. If not, throw `new TypeError`
diff --git a/packages/inference/src/HfInference.ts b/packages/inference/src/HfInference.ts
@@ -519,21 +519,52 @@ export class HfInference {
 	 * Tries to fill in a hole with a missing word (token to be precise). That’s the base task for BERT models.
 	 */
 	public async fillMask(args: FillMaskArgs, options?: Options): Promise<FillMaskReturn> {
-		return await this.request(args, options);
+		const res = await this.request<FillMaskReturn>(args, options);
+		const isValidOutput =
+			Array.isArray(res) &&
+			res.every(
+				(x) =>
+					typeof x.score === "number" &&
+					typeof x.sequence === "string" &&
+					typeof x.token === "number" &&
+					typeof x.token_str === "string"
+			);
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type Array<score: number, sequence:string, token:number, token_str:string>"
+			);
+		}
+		return res;
 	}
 
 	/**
 	 * This task is well known to summarize longer text into shorter text. Be careful, some models have a maximum length of input. That means that the summary cannot handle full books for instance. Be careful when choosing your model.
 	 */
 	public async summarization(args: SummarizationArgs, options?: Options): Promise<SummarizationReturn> {
-		return (await this.request<SummarizationReturn[]>(args, options))?.[0];
+		const res = await this.request<SummarizationReturn[]>(args, options);
+		const isValidOutput = Array.isArray(res) && res.every((x) => typeof x.summary_text === "string");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<summary_text: string>");
+		}
+		return res?.[0];
 	}
 
 	/**
 	 * Want to have a nice know-it-all bot that can answer any question?. Recommended model: deepset/roberta-base-squad2
 	 */
 	public async questionAnswer(args: QuestionAnswerArgs, options?: Options): Promise<QuestionAnswerReturn> {
-		return await this.request(args, options);
+		const res = await this.request<QuestionAnswerReturn>(args, options);
+		const isValidOutput =
+			typeof res.answer === "string" &&
+			typeof res.end === "number" &&
+			typeof res.score === "number" &&
+			typeof res.start === "number";
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type <answer: string, end: number, score: number, start: number>"
+			);
+		}
+		return res;
 	}
 
 	/**
@@ -543,21 +574,45 @@ export class HfInference {
 		args: TableQuestionAnswerArgs,
 		options?: Options
 	): Promise<TableQuestionAnswerReturn> {
-		return await this.request(args, options);
+		const res = await this.request<TableQuestionAnswerReturn>(args, options);
+		const isValidOutput =
+			typeof res.aggregator === "string" &&
+			typeof res.answer === "string" &&
+			Array.isArray(res.cells) &&
+			res.cells.every((x) => typeof x === "string") &&
+			Array.isArray(res.coordinates) &&
+			res.coordinates.every((coord) => Array.isArray(coord) && coord.every((x) => typeof x === "number"));
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type <aggregator: string, answer: string, cells: string[], coordinates: number[][]>"
+			);
+		}
+		return res;
 	}
 
 	/**
 	 * Usually used for sentiment-analysis this will output the likelihood of classes of an input. Recommended model: distilbert-base-uncased-finetuned-sst-2-english
 	 */
 	public async textClassification(args: TextClassificationArgs, options?: Options): Promise<TextClassificationReturn> {
-		return (await this.request<TextClassificationReturn[]>(args, options))?.[0];
+		const res = (await this.request<TextClassificationReturn[]>(args, options))?.[0];
+		const isValidOutput =
+			Array.isArray(res) && res.every((x) => typeof x.label === "string" && typeof x.score === "number");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<label: string, score: number>");
+		}
+		return res;
 	}
 
 	/**
 	 * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
 	 */
 	public async textGeneration(args: TextGenerationArgs, options?: Options): Promise<TextGenerationReturn> {
-		return (await this.request<TextGenerationReturn[]>(args, options))?.[0];
+		const res = await this.request<TextGenerationReturn[]>(args, options);
+		const isValidOutput = Array.isArray(res) && res.every((x) => typeof x.generated_text === "string");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<generated_text: string>");
+		}
+		return res?.[0];
 	}
 
 	/**
@@ -567,14 +622,35 @@ export class HfInference {
 		args: TokenClassificationArgs,
 		options?: Options
 	): Promise<TokenClassificationReturn> {
-		return toArray(await this.request(args, options));
+		const res = toArray(await this.request<TokenClassificationReturnValue | TokenClassificationReturn>(args, options));
+		const isValidOutput =
+			Array.isArray(res) &&
+			res.every(
+				(x) =>
+					typeof x.end === "number" &&
+					typeof x.entity_group === "string" &&
+					typeof x.score === "number" &&
+					typeof x.start === "number" &&
+					typeof x.word === "string"
+			);
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type Array<end: number, entity_group: string, score: number, start: number, word: string>"
+			);
+		}
+		return res;
 	}
 
 	/**
 	 * This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
 	 */
 	public async translation(args: TranslationArgs, options?: Options): Promise<TranslationReturn> {
-		return (await this.request<TranslationReturn[]>(args, options))?.[0];
+		const res = await this.request<TranslationReturn[]>(args, options);
+		const isValidOutput = Array.isArray(res) && res.every((x) => typeof x.translation_text === "string");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<translation_text: string>");
+		}
+		return res?.[0];
 	}
 
 	/**
@@ -584,24 +660,55 @@ export class HfInference {
 		args: ZeroShotClassificationArgs,
 		options?: Options
 	): Promise<ZeroShotClassificationReturn> {
-		return toArray(
-			await this.request<ZeroShotClassificationReturnValue | ZeroShotClassificationReturnValue[]>(args, options)
+		const res = toArray(
+			await this.request<ZeroShotClassificationReturnValue | ZeroShotClassificationReturn>(args, options)
 		);
+		const isValidOutput =
+			Array.isArray(res) &&
+			res.every(
+				(x) =>
+					Array.isArray(x.labels) &&
+					x.labels.every((_label) => typeof _label === "string") &&
+					Array.isArray(x.scores) &&
+					x.scores.every((_score) => typeof _score === "number") &&
+					typeof x.sequence === "string"
+			);
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type Array<labels: string[], scores: number[], sequence: string>"
+			);
+		}
+		return res;
 	}
 
 	/**
 	 * This task corresponds to any chatbot like structure. Models tend to have shorter max_length, so please check with caution when using a given model if you need long range dependency or not. Recommended model: microsoft/DialoGPT-large.
 	 *
 	 */
 	public async conversational(args: ConversationalArgs, options?: Options): Promise<ConversationalReturn> {
-		return await this.request(args, options);
+		const res = await this.request<ConversationalReturn>(args, options);
+		const isValidOutput =
+			Array.isArray(res.conversation.generated_responses) &&
+			res.conversation.generated_responses.every((x) => typeof x === "string") &&
+			Array.isArray(res.conversation.past_user_inputs) &&
+			res.conversation.past_user_inputs.every((x) => typeof x === "string") &&
+			typeof res.generated_text === "string" &&
+			Array.isArray(res.warnings) &&
+			res.warnings.every((x) => typeof x === "string");
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type <conversation: {generated_responses: string[], past_user_inputs: string[]}, generated_text: string, warnings: string[]>"
+			);
+		}
+		return res;
 	}
 
 	/**
 	 * This task reads some text and outputs raw float values, that are usually consumed as part of a semantic database/semantic search.
 	 */
 	public async featureExtraction(args: FeatureExtractionArgs, options?: Options): Promise<FeatureExtractionReturn> {
-		return await this.request(args, options);
+		const res = await this.request<FeatureExtractionReturn>(args, options);
+		return res;
 	}
 
 	/**
@@ -612,10 +719,15 @@ export class HfInference {
 		args: AutomaticSpeechRecognitionArgs,
 		options?: Options
 	): Promise<AutomaticSpeechRecognitionReturn> {
-		return await this.request(args, {
+		const res = await this.request<AutomaticSpeechRecognitionReturn>(args, {
 			...options,
 			binary: true,
 		});
+		const isValidOutput = typeof res.text === "string";
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type <text: string>");
+		}
+		return res;
 	}
 
 	/**
@@ -626,10 +738,16 @@ export class HfInference {
 		args: AudioClassificationArgs,
 		options?: Options
 	): Promise<AudioClassificationReturn> {
-		return await this.request(args, {
+		const res = await this.request<AudioClassificationReturn>(args, {
 			...options,
 			binary: true,
 		});
+		const isValidOutput =
+			Array.isArray(res) && res.every((x) => typeof x.label === "string" && typeof x.score === "number");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<label: string, score: number>");
+		}
+		return res;
 	}
 
 	/**
@@ -640,43 +758,80 @@ export class HfInference {
 		args: ImageClassificationArgs,
 		options?: Options
 	): Promise<ImageClassificationReturn> {
-		return await this.request(args, {
+		const res = await this.request<ImageClassificationReturn>(args, {
 			...options,
 			binary: true,
 		});
+		const isValidOutput =
+			Array.isArray(res) && res.every((x) => typeof x.label === "string" && typeof x.score === "number");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<label: string, score: number>");
+		}
+		return res;
 	}
 
 	/**
 	 * This task reads some image input and outputs the likelihood of classes & bounding boxes of detected objects.
 	 * Recommended model: facebook/detr-resnet-50
 	 */
 	public async objectDetection(args: ObjectDetectionArgs, options?: Options): Promise<ObjectDetectionReturn> {
-		return await this.request(args, {
+		const res = await this.request<ObjectDetectionReturn>(args, {
 			...options,
 			binary: true,
 		});
+		const isValidOutput =
+			Array.isArray(res) &&
+			res.every(
+				(x) =>
+					typeof x.label === "string" &&
+					typeof x.score === "number" &&
+					typeof x.box.xmin === "number" &&
+					typeof x.box.ymin === "number" &&
+					typeof x.box.xmax === "number" &&
+					typeof x.box.ymax === "number"
+			);
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type Array<{label:string; score:number; box:{xmin:number; ymin:number; xmax:number; ymax:number}}>"
+			);
+		}
+		return res;
 	}
 
 	/**
 	 * This task reads some image input and outputs the likelihood of classes & bounding boxes of detected objects.
 	 * Recommended model: facebook/detr-resnet-50-panoptic
 	 */
 	public async imageSegmentation(args: ImageSegmentationArgs, options?: Options): Promise<ImageSegmentationReturn> {
-		return await this.request(args, {
+		const res = await this.request<ImageSegmentationReturn>(args, {
 			...options,
 			binary: true,
 		});
+		const isValidOutput =
+			Array.isArray(res) &&
+			res.every((x) => typeof x.label === "string" && typeof x.mask === "string" && typeof x.score === "number");
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type Array<label: string, mask: string, score: number>"
+			);
+		}
+		return res;
 	}
 
 	/**
 	 * This task reads some text input and outputs an image.
 	 * Recommended model: stabilityai/stable-diffusion-2
 	 */
 	public async textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn> {
-		return await this.request(args, {
+		const res = await this.request<TextToImageReturn>(args, {
 			...options,
 			blob: true,
 		});
+		const isValidOutput = res && res instanceof Blob;
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type object & of instance Blob");
+		}
+		return res;
 	}
 
 	public async request<T>(