[Tasks] update automatic speech recognition output specs (huggingface#1167)

hanouticelina · web-flow · commit c843fbe42683 · 2025-02-04T11:23:10.000+01:00
It should have been `timestamp` instead of `timestamps` (see transformers pipeline output [here](https://github.com/huggingface/transformers/blob/9d2056f12b66e64978f78a2dcb023f65b2be2108/src/transformers/pipelines/automatic_speech_recognition.py#L648)). this also aligns with fal-ai output [specs](https://fal.ai/models/fal-ai/whisper/api#type-WhisperChunk) of the model which is currently supported.
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
@@ -145,6 +145,6 @@ export interface AutomaticSpeechRecognitionOutputChunk {
 	/**
 	 * The start and end timestamps corresponding with the text
 	 */
-	timestamps: number[];
+	timestamp: number[];
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json b/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json
@@ -20,7 +20,7 @@
 						"type": "string",
 						"description": "A chunk of text identified by the model"
 					},
-					"timestamps": {
+					"timestamp": {
 						"type": "array",
 						"description": "The start and end timestamps corresponding with the text",
 						"items": {
@@ -30,7 +30,7 @@
 						"maxLength": 2
 					}
 				},
-				"required": ["text", "timestamps"]
+				"required": ["text", "timestamp"]
 			}
 		}
 	},

Original file line number	Diff line number	Diff line change
`@@ -145,6 +145,6 @@ export interface AutomaticSpeechRecognitionOutputChunk {`
`145`	`145`	`/**`
`146`	`146`	`* The start and end timestamps corresponding with the text`
`147`	`147`	`*/`
`148`		`- timestamps: number[];`
	`148`	`+ timestamp: number[];`
`149`	`149`	`[property: string]: unknown;`
`150`	`150`	`}`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@`
`20`	`20`	`"type": "string",`
`21`	`21`	`"description": "A chunk of text identified by the model"`
`22`	`22`	`},`
`23`		`- "timestamps": {`
	`23`	`+ "timestamp": {`
`24`	`24`	`"type": "array",`
`25`	`25`	`"description": "The start and end timestamps corresponding with the text",`
`26`	`26`	`"items": {`
`@@ -30,7 +30,7 @@`
`30`	`30`	`"maxLength": 2`
`31`	`31`	`}`
`32`	`32`	`},`
`33`		`- "required": ["text", "timestamps"]`
	`33`	`+ "required": ["text", "timestamp"]`
`34`	`34`	`}`
`35`	`35`	`}`
`36`	`36`	`},`