Add text to video snippets (#1240)

kefranabg · web-flow · commit 0193e6b3ee4c · 2025-03-03T09:55:34.000+01:00
There is probably some missing snippets, I added these based on our docs. Feel free to update if something's missing Needed for huggingface-internal/moon-landing#12722
diff --git a/packages/tasks-gen/scripts/generate-snippets-fixtures.ts b/packages/tasks-gen/scripts/generate-snippets-fixtures.ts
@@ -90,6 +90,17 @@ const TEST_CASES: {
 		providers: ["hf-inference", "fal-ai"],
 		languages: ["sh", "js", "py"],
 	},
+	{
+		testName: "text-to-video",
+		model: {
+			id: "tencent/HunyuanVideo",
+			pipeline_tag: "text-to-video",
+			tags: [],
+			inference: "",
+		},
+		providers: ["replicate", "fal-ai"],
+		languages: ["js", "py"],
+	},
 	{
 		testName: "text-classification",
 		model: {
diff --git a/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.fal-ai.js b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.fal-ai.js
@@ -0,0 +1,11 @@
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("api_token");
+
+const video = await client.textToVideo({
+	model: "tencent/HunyuanVideo",
+	provider: "fal-ai",
+	inputs: "A young man walking on the street",
+	parameters: { num_inference_steps: 5 },
+});
+// Use the generated video (it's a Blob)
diff --git a/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.replicate.js b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.replicate.js
@@ -0,0 +1,11 @@
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("api_token");
+
+const video = await client.textToVideo({
+	model: "tencent/HunyuanVideo",
+	provider: "replicate",
+	inputs: "A young man walking on the street",
+	parameters: { num_inference_steps: 5 },
+});
+// Use the generated video (it's a Blob)
diff --git a/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface_hub.fal-ai.py b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface_hub.fal-ai.py
@@ -0,0 +1,11 @@
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+	provider="fal-ai",
+	api_key="api_token"
+)
+
+video = client.text_to_video(
+	"A young man walking on the street",
+	model="tencent/HunyuanVideo"
+)
diff --git a/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface_hub.replicate.py b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface_hub.replicate.py
@@ -0,0 +1,11 @@
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+	provider="replicate",
+	api_key="api_token"
+)
+
+video = client.text_to_video(
+	"A young man walking on the street",
+	model="tencent/HunyuanVideo"
+)
diff --git a/packages/tasks/src/snippets/inputs.ts b/packages/tasks/src/snippets/inputs.ts
@@ -96,6 +96,8 @@ const inputsAudioClassification = () => `"sample1.flac"`;
 
 const inputsTextToImage = () => `"Astronaut riding a horse"`;
 
+const inputsTextToVideo = () => `"A young man walking on the street"`;
+
 const inputsTextToSpeech = () => `"The answer to the universe is 42"`;
 
 const inputsTextToAudio = () => `"liquid drum and bass, atmospheric synths, airy sounds"`;
@@ -130,6 +132,7 @@ const modelInputSnippets: {
 	"text-generation": inputsTextGeneration,
 	"image-text-to-text": inputsTextGeneration,
 	"text-to-image": inputsTextToImage,
+	"text-to-video": inputsTextToVideo,
 	"text-to-speech": inputsTextToSpeech,
 	"text-to-audio": inputsTextToAudio,
 	"text2text-generation": inputsText2TextGeneration,
diff --git a/packages/tasks/src/snippets/js.ts b/packages/tasks/src/snippets/js.ts
@@ -275,6 +275,33 @@ query({"inputs": ${getModelInputSnippet(model)}}).then((response) => {
 	];
 };
 
+export const snippetTextToVideo = (
+	model: ModelDataMinimal,
+	accessToken: string,
+	provider: SnippetInferenceProvider
+): InferenceSnippet[] => {
+	return ["fal-ai", "replicate"].includes(provider)
+		? [
+				{
+					client: "huggingface.js",
+					content: `\
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("${accessToken || `{API_TOKEN}`}");
+
+const video = await client.textToVideo({
+	model: "${model.id}",
+	provider: "${provider}",
+	inputs: ${getModelInputSnippet(model)},
+	parameters: { num_inference_steps: 5 },
+});
+// Use the generated video (it's a Blob)
+`,
+				},
+		  ]
+		: [];
+};
+
 export const snippetTextToAudio = (
 	model: ModelDataMinimal,
 	accessToken: string,
@@ -420,6 +447,7 @@ export const jsSnippets: Partial<
 	"sentence-similarity": snippetBasic,
 	"automatic-speech-recognition": snippetAutomaticSpeechRecognition,
 	"text-to-image": snippetTextToImage,
+	"text-to-video": snippetTextToVideo,
 	"text-to-speech": snippetTextToAudio,
 	"text-to-audio": snippetTextToAudio,
 	"audio-to-audio": snippetFile,
diff --git a/packages/tasks/src/snippets/python.ts b/packages/tasks/src/snippets/python.ts
@@ -308,6 +308,27 @@ image = Image.open(io.BytesIO(image_bytes))`,
 	];
 };
 
+export const snippetTextToVideo = (
+	model: ModelDataMinimal,
+	accessToken: string,
+	provider: SnippetInferenceProvider
+): InferenceSnippet[] => {
+	return ["fal-ai", "replicate"].includes(provider)
+		? [
+				{
+					client: "huggingface_hub",
+					content: `\
+${snippetImportInferenceClient(accessToken, provider)}
+
+video = client.text_to_video(
+	${getModelInputSnippet(model)},
+	model="${model.id}"
+)`,
+				},
+		  ]
+		: [];
+};
+
 export const snippetTabular = (model: ModelDataMinimal): InferenceSnippet[] => {
 	return [
 		{
@@ -412,6 +433,7 @@ export const pythonSnippets: Partial<
 	"sentence-similarity": snippetBasic,
 	"automatic-speech-recognition": snippetFile,
 	"text-to-image": snippetTextToImage,
+	"text-to-video": snippetTextToVideo,
 	"text-to-speech": snippetTextToAudio,
 	"text-to-audio": snippetTextToAudio,
 	"audio-to-audio": snippetFile,
diff --git a/packages/tasks/src/tasks/text-to-video/data.ts b/packages/tasks/src/tasks/text-to-video/data.ts
@@ -99,7 +99,7 @@ const taskData: TaskDataCustom = {
 	],
 	summary:
 		"Text-to-video models can be used in any application that requires generating consistent sequence of images from text. ",
-	widgetModels: [],
+	widgetModels: ["tencent/HunyuanVideo"],
 	youtubeId: undefined,
 };