No more text2text (#1590)

Pierrci · SBrandeis · web-flow · commit 7087d2902716 · 2025-07-09T11:05:26.000-04:00
Heads up this is pure vibe-coding _pre-LLM_, i.e. I'm not sure what I'm doing but I'm still doing it, manually (though I tried to take inspiration from #457) The goal is to address https://discuss.huggingface.co/t/no-0-models-returned-by-text2text-search-filter/161546 following huggingface-internal/moon-landing#14258 --------- Co-authored-by: SBrandeis <simon@huggingface.co>
diff --git a/packages/inference/src/snippets/getInferenceSnippets.ts b/packages/inference/src/snippets/getInferenceSnippets.ts
@@ -122,7 +122,6 @@ const HF_JS_METHODS: Partial<Record<WidgetType, string>> = {
 	"table-question-answering": "tableQuestionAnswering",
 	"text-classification": "textClassification",
 	"text-generation": "textGeneration",
-	"text2text-generation": "textGeneration",
 	"token-classification": "tokenClassification",
 	"text-to-speech": "textToSpeech",
 	translation: "translation",
@@ -361,7 +360,6 @@ const snippets: Partial<
 	"text-to-image": snippetGenerator("textToImage"),
 	"text-to-speech": snippetGenerator("textToSpeech"),
 	"text-to-video": snippetGenerator("textToVideo"),
-	"text2text-generation": snippetGenerator("basic"),
 	"token-classification": snippetGenerator("basic"),
 	translation: snippetGenerator("basic"),
 	"zero-shot-classification": snippetGenerator("zeroShotClassification"),
diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts
@@ -1429,7 +1429,7 @@ describe.skip("InferenceClient", () => {
 					providerId: "mistralai/Devstral-Small-2505",
 					hfModelId: "mistralai/Devstral-Small-2505",
 					status: "live",
-					task: "text2text-generation",
+					task: "text-generation",
 				},
 			};
 
@@ -1479,7 +1479,7 @@ describe.skip("InferenceClient", () => {
 				expect(res[0]).toEqual(expect.arrayContaining([expect.any(Number)]));
 			});
 
-			it("text2textGeneration", async () => {
+			it("textGeneration", async () => {
 				const res = await client.textGeneration({
 					model: "mistralai/Devstral-Small-2505",
 					provider: "nebius",
diff --git a/packages/tasks/src/library-to-tasks.ts b/packages/tasks/src/library-to-tasks.ts
@@ -35,13 +35,7 @@ export const LIBRARY_TASK_MAPPING: Partial<Record<ModelLibraryKey, PipelineType[
 	sklearn: ["tabular-classification", "tabular-regression", "text-classification"],
 	spacy: ["token-classification", "text-classification", "sentence-similarity"],
 	"span-marker": ["token-classification"],
-	speechbrain: [
-		"audio-classification",
-		"audio-to-audio",
-		"automatic-speech-recognition",
-		"text-to-speech",
-		"text2text-generation",
-	],
+	speechbrain: ["audio-classification", "audio-to-audio", "automatic-speech-recognition", "text-to-speech"],
 	stanza: ["token-classification"],
 	timm: ["image-classification", "image-feature-extraction"],
 	transformers: [
@@ -62,7 +56,6 @@ export const LIBRARY_TASK_MAPPING: Partial<Record<ModelLibraryKey, PipelineType[
 		"question-answering",
 		"summarization",
 		"table-question-answering",
-		"text2text-generation",
 		"text-classification",
 		"text-generation",
 		"text-to-audio",
diff --git a/packages/tasks/src/pipelines.ts b/packages/tasks/src/pipelines.ts
@@ -237,12 +237,6 @@ export const PIPELINE_DATA = {
 				type: "language-modeling",
 				name: "Language Modeling",
 			},
-		],
-		modality: "nlp",
-	},
-	"text2text-generation": {
-		name: "Text2Text Generation",
-		subtasks: [
 			{
 				type: "text-simplification",
 				name: "Text simplification",
@@ -271,6 +265,10 @@ export const PIPELINE_DATA = {
 				type: "closed-book-qa",
 				name: "Closed Book QA",
 			},
+			{
+				type: "text2text-generation",
+				name: "Text2Text Generation",
+			},
 		],
 		modality: "nlp",
 	},
diff --git a/packages/tasks/src/snippets/inputs.ts b/packages/tasks/src/snippets/inputs.ts
@@ -66,8 +66,6 @@ const inputsTextGeneration = (model: ModelDataMinimal): string | ChatCompletionI
 	return `"Can you please let us know more details about your "`;
 };
 
-const inputsText2TextGeneration = () => `"The answer to the universe is"`;
-
 const inputsFillMask = (model: ModelDataMinimal) => `"The answer to the universe is ${model.mask_token}."`;
 
 const inputsSentenceSimilarity = () =>
@@ -147,7 +145,6 @@ const modelInputSnippets: {
 	"text-to-video": inputsTextToVideo,
 	"text-to-speech": inputsTextToSpeech,
 	"text-to-audio": inputsTextToAudio,
-	"text2text-generation": inputsText2TextGeneration,
 	"token-classification": inputsTokenClassification,
 	translation: inputsTranslation,
 	"zero-shot-classification": inputsZeroShotClassification,
diff --git a/packages/tasks/src/tasks/index.ts b/packages/tasks/src/tasks/index.ts
@@ -158,7 +158,6 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
 	"text-to-speech": ["espnet", "tensorflowtts", "transformers", "transformers.js"],
 	"text-to-audio": ["transformers", "transformers.js"],
 	"text-to-video": ["diffusers"],
-	"text2text-generation": ["transformers", "transformers.js"],
 	"time-series-forecasting": [],
 	"token-classification": [
 		"adapter-transformers",
@@ -245,7 +244,6 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
 	"text-to-speech": getData("text-to-speech", textToSpeech),
 	"text-to-audio": undefined,
 	"text-to-video": getData("text-to-video", textToVideo),
-	"text2text-generation": undefined,
 	"time-series-forecasting": undefined,
 	"token-classification": getData("token-classification", tokenClassification),
 	translation: getData("translation", translation),
diff --git a/packages/tasks/src/tasks/placeholder/data.ts b/packages/tasks/src/tasks/placeholder/data.ts
@@ -14,7 +14,7 @@ const taskData: TaskDataCustom = {
 	widgetModels: [],
 	youtubeId: undefined,
 	/// If this is a subtask, link to the most general task ID
-	/// (eg, text2text-generation is the canonical ID of translation)
+	/// (eg, text-generation is the canonical ID of text-simplification)
 	canonicalId: undefined,
 };
 
diff --git a/packages/tasks/src/tasks/summarization/data.ts b/packages/tasks/src/tasks/summarization/data.ts
@@ -1,7 +1,7 @@
 import type { TaskDataCustom } from "../index.js";
 
 const taskData: TaskDataCustom = {
-	canonicalId: "text2text-generation",
+	canonicalId: "text-generation",
 	datasets: [
 		{
 			description:
diff --git a/packages/tasks/src/tasks/text-generation/about.md b/packages/tasks/src/tasks/text-generation/about.md
@@ -1,4 +1,4 @@
-This task covers guides on both [text-generation](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads) and [text-to-text generation](https://huggingface.co/models?pipeline_tag=text2text-generation&sort=downloads) models. Popular large language models that are used for chats or following instructions are also covered in this task. You can find the list of selected open-source large language models [here](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard), ranked by their performance scores.
+This task covers guides on both [text-generation](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads) and [text-to-text generation](https://huggingface.co/models?other=text2text-generation&sort=downloads) models. Popular large language models that are used for chats or following instructions are also covered in this task. You can find the list of selected open-source large language models [here](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard), ranked by their performance scores.
 
 ## Use Cases
 
@@ -58,7 +58,7 @@ generator("Hello, I'm a language model", max_length = 30, num_return_sequences=3
 ##  {'generated_text': "Hello, I'm a language modeler. I write and maintain software in Python. I love to code, and that includes coding things that require writing"}, ...
 ```
 
-[Text-to-Text generation models](https://huggingface.co/models?pipeline_tag=text2text-generation&sort=downloads) have a separate pipeline called `text2text-generation`. This pipeline takes an input containing the sentence including the task and returns the output of the accomplished task.
+[Text-to-Text generation models](https://huggingface.co/models?other=text2text-generation&sort=downloads) have a separate pipeline called `text2text-generation`. This pipeline takes an input containing the sentence including the task and returns the output of the accomplished task.
 
 ```python
 from transformers import pipeline
diff --git a/packages/tasks/src/tasks/text2text-generation/inference.ts b/packages/tasks/src/tasks/text2text-generation/inference.ts
diff --git a/packages/tasks/src/tasks/text2text-generation/spec/input.json b/packages/tasks/src/tasks/text2text-generation/spec/input.json
diff --git a/packages/tasks/src/tasks/text2text-generation/spec/output.json b/packages/tasks/src/tasks/text2text-generation/spec/output.json
diff --git a/packages/tasks/src/tasks/translation/data.ts b/packages/tasks/src/tasks/translation/data.ts
@@ -1,7 +1,7 @@
 import type { TaskDataCustom } from "../index.js";
 
 const taskData: TaskDataCustom = {
-	canonicalId: "text2text-generation",
+	canonicalId: "text-generation",
 	datasets: [
 		{
 			description: "A dataset of copyright-free books translated into 16 different languages.",

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`import type { TaskDataCustom } from "../index.js";`
`2`	`2`
`3`	`3`	`const taskData: TaskDataCustom = {`
`4`		`- canonicalId: "text2text-generation",`
	`4`	`+ canonicalId: "text-generation",`
`5`	`5`	`datasets: [`
`6`	`6`	`{`
`7`	`7`	`description:`