From 985e5fdd590982680d615386951c4d29658efa29 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Wed, 4 Jun 2025 18:21:35 +0200
Subject: [PATCH] [Inference Snippet] Add adirectRequest option (false by
 default)

---
 .../src/snippets/getInferenceSnippets.ts      | 64 +++++++++++++------
 .../scripts/generate-snippets-fixtures.ts     | 12 ++++
 .../js/openai/0.together.js                   |  4 +-
 .../python/openai/0.together.py               |  4 +-
 .../python/requests/0.together.py             |  4 +-
 .../sh/curl/0.together.sh                     |  4 +-
 .../js/openai/0.together.js                   |  4 +-
 .../python/openai/0.together.py               |  4 +-
 .../python/requests/0.together.py             |  4 +-
 .../sh/curl/0.together.sh                     |  4 +-
 .../js/openai/0.fireworks-ai.js               |  4 +-
 .../python/openai/0.fireworks-ai.py           |  4 +-
 .../python/requests/0.fireworks-ai.py         |  4 +-
 .../sh/curl/0.fireworks-ai.sh                 |  4 +-
 .../js/openai/0.fireworks-ai.js               |  4 +-
 .../python/openai/0.fireworks-ai.py           |  4 +-
 .../python/requests/0.fireworks-ai.py         |  4 +-
 .../sh/curl/0.fireworks-ai.sh                 |  4 +-
 .../js/huggingface.js/0.together.js           | 16 +++++
 .../js/openai/0.together.js                   | 18 ++++++
 .../python/huggingface_hub/0.together.py      | 19 ++++++
 .../python/openai/0.together.py               | 19 ++++++
 .../python/requests/0.together.py             | 23 +++++++
 .../sh/curl/0.together.sh                     | 13 ++++
 .../text-to-image--lora/js/fetch/0.fal-ai.js  |  4 +-
 .../text-to-image/js/fetch/0.fal-ai.js        |  4 +-
 .../text-to-speech/js/fetch/0.fal-ai.js       |  4 +-
 .../python/requests/0.fal-ai.py               |  4 +-
 28 files changed, 203 insertions(+), 61 deletions(-)
 create mode 100644 packages/tasks-gen/snippets-fixtures/explicit-direct-request/js/huggingface.js/0.together.js
 create mode 100644 packages/tasks-gen/snippets-fixtures/explicit-direct-request/js/openai/0.together.js
 create mode 100644 packages/tasks-gen/snippets-fixtures/explicit-direct-request/python/huggingface_hub/0.together.py
 create mode 100644 packages/tasks-gen/snippets-fixtures/explicit-direct-request/python/openai/0.together.py
 create mode 100644 packages/tasks-gen/snippets-fixtures/explicit-direct-request/python/requests/0.together.py
 create mode 100644 packages/tasks-gen/snippets-fixtures/explicit-direct-request/sh/curl/0.together.sh

diff --git a/packages/inference/src/snippets/getInferenceSnippets.ts b/packages/inference/src/snippets/getInferenceSnippets.ts
index 767d6cc40f..19d8de3afe 100644
--- a/packages/inference/src/snippets/getInferenceSnippets.ts
+++ b/packages/inference/src/snippets/getInferenceSnippets.ts
@@ -14,10 +14,12 @@ import { makeRequestOptionsFromResolvedModel } from "../lib/makeRequestOptions.j
 import type { InferenceProviderOrPolicy, InferenceTask, RequestArgs } from "../types.js";
 import { templates } from "./templates.exported.js";
 
-export type InferenceSnippetOptions = { streaming?: boolean; billTo?: string; accessToken?: string } & Record<
-	string,
-	unknown
->;
+export type InferenceSnippetOptions = {
+	streaming?: boolean;
+	billTo?: string;
+	accessToken?: string;
+	directRequest?: boolean;
+} & Record<string, unknown>;
 
 const PYTHON_CLIENTS = ["huggingface_hub", "fal_client", "requests", "openai"] as const;
 const JS_CLIENTS = ["fetch", "huggingface.js", "openai"] as const;
@@ -124,7 +126,10 @@ const HF_JS_METHODS: Partial<Record<WidgetType, string>> = {
 	translation: "translation",
 };
 
-const ACCESS_TOKEN_PLACEHOLDER = "<ACCESS_TOKEN>"; // Placeholder to replace with env variable in snippets
+// Placeholders to replace with env variable in snippets
+// little hack to support both direct requests and routing => routed requests should start with "hf_"
+const ACCESS_TOKEN_ROUTING_PLACEHOLDER = "hf_token_placeholder";
+const ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER = "not_hf_token_placeholder";
 
 // Snippet generators
 const snippetGenerator = (templateName: string, inputPreparationFn?: InputPreparationFn) => {
@@ -153,7 +158,11 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 			console.error(`Failed to get provider helper for ${provider} (${task})`, e);
 			return [];
 		}
-		const accessTokenOrPlaceholder = opts?.accessToken ?? ACCESS_TOKEN_PLACEHOLDER;
+
+		const placeholder = opts?.directRequest
+			? ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER
+			: ACCESS_TOKEN_ROUTING_PLACEHOLDER;
+		const accessTokenOrPlaceholder = opts?.accessToken ?? placeholder;
 
 		/// Prepare inputs + make request
 		const inputs = inputPreparationFn ? inputPreparationFn(model, opts) : { inputs: getModelInputSnippet(model) };
@@ -255,8 +264,8 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 						}
 
 						/// Replace access token placeholder
-						if (snippet.includes(ACCESS_TOKEN_PLACEHOLDER)) {
-							snippet = replaceAccessTokenPlaceholder(snippet, language, provider);
+						if (snippet.includes(placeholder)) {
+							snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
 						}
 
 						/// Snippet is ready!
@@ -431,6 +440,8 @@ function removeSuffix(str: string, suffix: string) {
 }
 
 function replaceAccessTokenPlaceholder(
+	directRequest: boolean | undefined,
+	placeholder: string,
 	snippet: string,
 	language: InferenceSnippetLanguage,
 	provider: InferenceProviderOrPolicy
@@ -439,46 +450,57 @@ function replaceAccessTokenPlaceholder(
 	// Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
 
 	// Determine if HF_TOKEN or specific provider token should be used
-	const accessTokenEnvVar =
-		!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
-		snippet.includes("https://router.huggingface.co") || // explicit routed request => use $HF_TOKEN
-		provider == "hf-inference" // hf-inference provider => use $HF_TOKEN
-			? "HF_TOKEN"
-			: provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
+	const useHfToken =
+		provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
+		(!directRequest && // if explicit directRequest => use provider-specific token
+			(!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
+				snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
+
+	const accessTokenEnvVar = useHfToken
+		? "HF_TOKEN" // e.g. routed request or hf-inference
+		: provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
 
 	// Replace the placeholder with the env variable
 	if (language === "sh") {
 		snippet = snippet.replace(
-			`'Authorization: Bearer ${ACCESS_TOKEN_PLACEHOLDER}'`,
+			`'Authorization: Bearer ${placeholder}'`,
 			`"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
 		);
 	} else if (language === "python") {
 		snippet = "import os\n" + snippet;
 		snippet = snippet.replace(
-			`"${ACCESS_TOKEN_PLACEHOLDER}"`,
+			`"${placeholder}"`,
 			`os.environ["${accessTokenEnvVar}"]` // e.g. os.environ["HF_TOKEN")
 		);
 		snippet = snippet.replace(
-			`"Bearer ${ACCESS_TOKEN_PLACEHOLDER}"`,
+			`"Bearer ${placeholder}"`,
 			`f"Bearer {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Bearer {os.environ['HF_TOKEN']}"
 		);
 		snippet = snippet.replace(
-			`"Key ${ACCESS_TOKEN_PLACEHOLDER}"`,
+			`"Key ${placeholder}"`,
 			`f"Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Key {os.environ['FAL_AI_API_KEY']}"
 		);
+		snippet = snippet.replace(
+			`"X-Key ${placeholder}"`,
+			`f"X-Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"X-Key {os.environ['BLACK_FOREST_LABS_API_KEY']}"
+		);
 	} else if (language === "js") {
 		snippet = snippet.replace(
-			`"${ACCESS_TOKEN_PLACEHOLDER}"`,
+			`"${placeholder}"`,
 			`process.env.${accessTokenEnvVar}` // e.g. process.env.HF_TOKEN
 		);
 		snippet = snippet.replace(
-			`Authorization: "Bearer ${ACCESS_TOKEN_PLACEHOLDER}",`,
+			`Authorization: "Bearer ${placeholder}",`,
 			`Authorization: \`Bearer $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Bearer ${process.env.HF_TOKEN}`,
 		);
 		snippet = snippet.replace(
-			`Authorization: "Key ${ACCESS_TOKEN_PLACEHOLDER}",`,
+			`Authorization: "Key ${placeholder}",`,
 			`Authorization: \`Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Key ${process.env.FAL_AI_API_KEY}`,
 		);
+		snippet = snippet.replace(
+			`Authorization: "X-Key ${placeholder}",`,
+			`Authorization: \`X-Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `X-Key ${process.env.BLACK_FOREST_LABS_AI_API_KEY}`,
+		);
 	}
 	return snippet;
 }
diff --git a/packages/tasks-gen/scripts/generate-snippets-fixtures.ts b/packages/tasks-gen/scripts/generate-snippets-fixtures.ts
index c18e2fdd00..c69629631b 100644
--- a/packages/tasks-gen/scripts/generate-snippets-fixtures.ts
+++ b/packages/tasks-gen/scripts/generate-snippets-fixtures.ts
@@ -252,6 +252,18 @@ const TEST_CASES: {
 		providers: ["hf-inference"],
 		opts: { accessToken: "hf_xxx" },
 	},
+	{
+		testName: "explicit-direct-request",
+		task: "conversational",
+		model: {
+			id: "meta-llama/Llama-3.1-8B-Instruct",
+			pipeline_tag: "text-generation",
+			tags: ["conversational"],
+			inference: "",
+		},
+		providers: ["together"],
+		opts: { directRequest: true },
+	},
 	{
 		testName: "text-to-speech",
 		task: "text-to-speech",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.together.js
index 0b88f47cf1..215f976d89 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.together.js
+++ b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.together.js
@@ -1,8 +1,8 @@
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://api.together.xyz/v1",
-	apiKey: process.env.TOGETHER_API_KEY,
+	baseURL: "https://router.huggingface.co/together/v1",
+	apiKey: process.env.HF_TOKEN,
 });
 
 const chatCompletion = await client.chat.completions.create({
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.together.py
index 6b3b71305f..46ef833863 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.together.py
+++ b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.together.py
@@ -2,8 +2,8 @@
 from openai import OpenAI
 
 client = OpenAI(
-    base_url="https://api.together.xyz/v1",
-    api_key=os.environ["TOGETHER_API_KEY"],
+    base_url="https://router.huggingface.co/together/v1",
+    api_key=os.environ["HF_TOKEN"],
 )
 
 completion = client.chat.completions.create(
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.together.py
index 8583441882..5c480be35b 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.together.py
+++ b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.together.py
@@ -1,9 +1,9 @@
 import os
 import requests
 
-API_URL = "https://api.together.xyz/v1/chat/completions"
+API_URL = "https://router.huggingface.co/together/v1/chat/completions"
 headers = {
-    "Authorization": f"Bearer {os.environ['TOGETHER_API_KEY']}",
+    "Authorization": f"Bearer {os.environ['HF_TOKEN']}",
 }
 
 def query(payload):
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.together.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.together.sh
index 268f721daa..c3a7de35b3 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.together.sh
+++ b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.together.sh
@@ -1,5 +1,5 @@
-curl https://api.together.xyz/v1/chat/completions \
-    -H "Authorization: Bearer $TOGETHER_API_KEY" \
+curl https://router.huggingface.co/together/v1/chat/completions \
+    -H "Authorization: Bearer $HF_TOKEN" \
     -H 'Content-Type: application/json' \
     -d '{
         "messages": [
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.together.js
index 453992a01c..dec3786687 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.together.js
+++ b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.together.js
@@ -1,8 +1,8 @@
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://api.together.xyz/v1",
-	apiKey: process.env.TOGETHER_API_KEY,
+	baseURL: "https://router.huggingface.co/together/v1",
+	apiKey: process.env.HF_TOKEN,
 });
 
 const stream = await client.chat.completions.create({
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.together.py
index d199f8d88e..ffb7e65a7d 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.together.py
+++ b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.together.py
@@ -2,8 +2,8 @@
 from openai import OpenAI
 
 client = OpenAI(
-    base_url="https://api.together.xyz/v1",
-    api_key=os.environ["TOGETHER_API_KEY"],
+    base_url="https://router.huggingface.co/together/v1",
+    api_key=os.environ["HF_TOKEN"],
 )
 
 stream = client.chat.completions.create(
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.together.py
index f3cf193cd3..123a5039a0 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.together.py
+++ b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.together.py
@@ -2,9 +2,9 @@
 import json
 import requests
 
-API_URL = "https://api.together.xyz/v1/chat/completions"
+API_URL = "https://router.huggingface.co/together/v1/chat/completions"
 headers = {
-    "Authorization": f"Bearer {os.environ['TOGETHER_API_KEY']}",
+    "Authorization": f"Bearer {os.environ['HF_TOKEN']}",
 }
 
 def query(payload):
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.together.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.together.sh
index 708320a076..825995dbc1 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.together.sh
+++ b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.together.sh
@@ -1,5 +1,5 @@
-curl https://api.together.xyz/v1/chat/completions \
-    -H "Authorization: Bearer $TOGETHER_API_KEY" \
+curl https://router.huggingface.co/together/v1/chat/completions \
+    -H "Authorization: Bearer $HF_TOKEN" \
     -H 'Content-Type: application/json' \
     -d '{
         "messages": [
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.fireworks-ai.js
index 540065e2f6..d436c7c7e2 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.fireworks-ai.js
+++ b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.fireworks-ai.js
@@ -1,8 +1,8 @@
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://api.fireworks.ai/inference/v1",
-	apiKey: process.env.FIREWORKS_AI_API_KEY,
+	baseURL: "https://router.huggingface.co/fireworks-ai/inference/v1",
+	apiKey: process.env.HF_TOKEN,
 });
 
 const chatCompletion = await client.chat.completions.create({
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.fireworks-ai.py
index 24fac1a5e8..b1e00266be 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.fireworks-ai.py
+++ b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.fireworks-ai.py
@@ -2,8 +2,8 @@
 from openai import OpenAI
 
 client = OpenAI(
-    base_url="https://api.fireworks.ai/inference/v1",
-    api_key=os.environ["FIREWORKS_AI_API_KEY"],
+    base_url="https://router.huggingface.co/fireworks-ai/inference/v1",
+    api_key=os.environ["HF_TOKEN"],
 )
 
 completion = client.chat.completions.create(
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.fireworks-ai.py
index f54678402b..6134467bf6 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.fireworks-ai.py
+++ b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.fireworks-ai.py
@@ -1,9 +1,9 @@
 import os
 import requests
 
-API_URL = "https://api.fireworks.ai/inference/v1/chat/completions"
+API_URL = "https://router.huggingface.co/fireworks-ai/inference/v1/chat/completions"
 headers = {
-    "Authorization": f"Bearer {os.environ['FIREWORKS_AI_API_KEY']}",
+    "Authorization": f"Bearer {os.environ['HF_TOKEN']}",
 }
 
 def query(payload):
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.fireworks-ai.sh b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.fireworks-ai.sh
index 4d9bcbb84c..f75d1a67f7 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.fireworks-ai.sh
+++ b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.fireworks-ai.sh
@@ -1,5 +1,5 @@
-curl https://api.fireworks.ai/inference/v1/chat/completions \
-    -H "Authorization: Bearer $FIREWORKS_AI_API_KEY" \
+curl https://router.huggingface.co/fireworks-ai/inference/v1/chat/completions \
+    -H "Authorization: Bearer $HF_TOKEN" \
     -H 'Content-Type: application/json' \
     -d '{
         "messages": [
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.fireworks-ai.js
index 3a5eabdff4..619ccb8450 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.fireworks-ai.js
+++ b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.fireworks-ai.js
@@ -1,8 +1,8 @@
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://api.fireworks.ai/inference/v1",
-	apiKey: process.env.FIREWORKS_AI_API_KEY,
+	baseURL: "https://router.huggingface.co/fireworks-ai/inference/v1",
+	apiKey: process.env.HF_TOKEN,
 });
 
 const stream = await client.chat.completions.create({
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/openai/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/openai/0.fireworks-ai.py
index d767b844c0..b509151034 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/openai/0.fireworks-ai.py
+++ b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/openai/0.fireworks-ai.py
@@ -2,8 +2,8 @@
 from openai import OpenAI
 
 client = OpenAI(
-    base_url="https://api.fireworks.ai/inference/v1",
-    api_key=os.environ["FIREWORKS_AI_API_KEY"],
+    base_url="https://router.huggingface.co/fireworks-ai/inference/v1",
+    api_key=os.environ["HF_TOKEN"],
 )
 
 stream = client.chat.completions.create(
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.fireworks-ai.py
index eaf0d5a9dd..b200e704f7 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.fireworks-ai.py
+++ b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.fireworks-ai.py
@@ -2,9 +2,9 @@
 import json
 import requests
 
-API_URL = "https://api.fireworks.ai/inference/v1/chat/completions"
+API_URL = "https://router.huggingface.co/fireworks-ai/inference/v1/chat/completions"
 headers = {
-    "Authorization": f"Bearer {os.environ['FIREWORKS_AI_API_KEY']}",
+    "Authorization": f"Bearer {os.environ['HF_TOKEN']}",
 }
 
 def query(payload):
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/sh/curl/0.fireworks-ai.sh b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/sh/curl/0.fireworks-ai.sh
index ea8afe017d..17b2b6726f 100644
--- a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/sh/curl/0.fireworks-ai.sh
+++ b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/sh/curl/0.fireworks-ai.sh
@@ -1,5 +1,5 @@
-curl https://api.fireworks.ai/inference/v1/chat/completions \
-    -H "Authorization: Bearer $FIREWORKS_AI_API_KEY" \
+curl https://router.huggingface.co/fireworks-ai/inference/v1/chat/completions \
+    -H "Authorization: Bearer $HF_TOKEN" \
     -H 'Content-Type: application/json' \
     -d '{
         "messages": [
diff --git a/packages/tasks-gen/snippets-fixtures/explicit-direct-request/js/huggingface.js/0.together.js b/packages/tasks-gen/snippets-fixtures/explicit-direct-request/js/huggingface.js/0.together.js
new file mode 100644
index 0000000000..021b73fec3
--- /dev/null
+++ b/packages/tasks-gen/snippets-fixtures/explicit-direct-request/js/huggingface.js/0.together.js
@@ -0,0 +1,16 @@
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient(process.env.TOGETHER_API_KEY);
+
+const chatCompletion = await client.chatCompletion({
+    provider: "together",
+    model: "meta-llama/Llama-3.1-8B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+});
+
+console.log(chatCompletion.choices[0].message);
\ No newline at end of file
diff --git a/packages/tasks-gen/snippets-fixtures/explicit-direct-request/js/openai/0.together.js b/packages/tasks-gen/snippets-fixtures/explicit-direct-request/js/openai/0.together.js
new file mode 100644
index 0000000000..0b88f47cf1
--- /dev/null
+++ b/packages/tasks-gen/snippets-fixtures/explicit-direct-request/js/openai/0.together.js
@@ -0,0 +1,18 @@
+import { OpenAI } from "openai";
+
+const client = new OpenAI({
+	baseURL: "https://api.together.xyz/v1",
+	apiKey: process.env.TOGETHER_API_KEY,
+});
+
+const chatCompletion = await client.chat.completions.create({
+	model: "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+});
+
+console.log(chatCompletion.choices[0].message);
\ No newline at end of file
diff --git a/packages/tasks-gen/snippets-fixtures/explicit-direct-request/python/huggingface_hub/0.together.py b/packages/tasks-gen/snippets-fixtures/explicit-direct-request/python/huggingface_hub/0.together.py
new file mode 100644
index 0000000000..fce67c5ded
--- /dev/null
+++ b/packages/tasks-gen/snippets-fixtures/explicit-direct-request/python/huggingface_hub/0.together.py
@@ -0,0 +1,19 @@
+import os
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="together",
+    api_key=os.environ["TOGETHER_API_KEY"],
+)
+
+completion = client.chat.completions.create(
+    model="meta-llama/Llama-3.1-8B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+)
+
+print(completion.choices[0].message)
\ No newline at end of file
diff --git a/packages/tasks-gen/snippets-fixtures/explicit-direct-request/python/openai/0.together.py b/packages/tasks-gen/snippets-fixtures/explicit-direct-request/python/openai/0.together.py
new file mode 100644
index 0000000000..6b3b71305f
--- /dev/null
+++ b/packages/tasks-gen/snippets-fixtures/explicit-direct-request/python/openai/0.together.py
@@ -0,0 +1,19 @@
+import os
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="https://api.together.xyz/v1",
+    api_key=os.environ["TOGETHER_API_KEY"],
+)
+
+completion = client.chat.completions.create(
+    model="<together alias for meta-llama/Llama-3.1-8B-Instruct>",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+)
+
+print(completion.choices[0].message)
\ No newline at end of file
diff --git a/packages/tasks-gen/snippets-fixtures/explicit-direct-request/python/requests/0.together.py b/packages/tasks-gen/snippets-fixtures/explicit-direct-request/python/requests/0.together.py
new file mode 100644
index 0000000000..8583441882
--- /dev/null
+++ b/packages/tasks-gen/snippets-fixtures/explicit-direct-request/python/requests/0.together.py
@@ -0,0 +1,23 @@
+import os
+import requests
+
+API_URL = "https://api.together.xyz/v1/chat/completions"
+headers = {
+    "Authorization": f"Bearer {os.environ['TOGETHER_API_KEY']}",
+}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
+    "messages": [
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>"
+})
+
+print(response["choices"][0]["message"])
\ No newline at end of file
diff --git a/packages/tasks-gen/snippets-fixtures/explicit-direct-request/sh/curl/0.together.sh b/packages/tasks-gen/snippets-fixtures/explicit-direct-request/sh/curl/0.together.sh
new file mode 100644
index 0000000000..268f721daa
--- /dev/null
+++ b/packages/tasks-gen/snippets-fixtures/explicit-direct-request/sh/curl/0.together.sh
@@ -0,0 +1,13 @@
+curl https://api.together.xyz/v1/chat/completions \
+    -H "Authorization: Bearer $TOGETHER_API_KEY" \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": "What is the capital of France?"
+            }
+        ],
+        "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
+        "stream": false
+    }'
\ No newline at end of file
diff --git a/packages/tasks-gen/snippets-fixtures/text-to-image--lora/js/fetch/0.fal-ai.js b/packages/tasks-gen/snippets-fixtures/text-to-image--lora/js/fetch/0.fal-ai.js
index a2d25b25de..8043329ba3 100644
--- a/packages/tasks-gen/snippets-fixtures/text-to-image--lora/js/fetch/0.fal-ai.js
+++ b/packages/tasks-gen/snippets-fixtures/text-to-image--lora/js/fetch/0.fal-ai.js
@@ -1,9 +1,9 @@
 async function query(data) {
 	const response = await fetch(
-		"https://fal.run/<fal-ai alias for openfree/flux-chatgpt-ghibli-lora>",
+		"https://router.huggingface.co/fal-ai/<fal-ai alias for openfree/flux-chatgpt-ghibli-lora>",
 		{
 			headers: {
-				Authorization: `Key ${process.env.FAL_AI_API_KEY}`,
+				Authorization: `Bearer ${process.env.HF_TOKEN}`,
 				"Content-Type": "application/json",
 			},
 			method: "POST",
diff --git a/packages/tasks-gen/snippets-fixtures/text-to-image/js/fetch/0.fal-ai.js b/packages/tasks-gen/snippets-fixtures/text-to-image/js/fetch/0.fal-ai.js
index 1b8d65c886..77eb6010d7 100644
--- a/packages/tasks-gen/snippets-fixtures/text-to-image/js/fetch/0.fal-ai.js
+++ b/packages/tasks-gen/snippets-fixtures/text-to-image/js/fetch/0.fal-ai.js
@@ -1,9 +1,9 @@
 async function query(data) {
 	const response = await fetch(
-		"https://fal.run/<fal-ai alias for black-forest-labs/FLUX.1-schnell>",
+		"https://router.huggingface.co/fal-ai/<fal-ai alias for black-forest-labs/FLUX.1-schnell>",
 		{
 			headers: {
-				Authorization: `Key ${process.env.FAL_AI_API_KEY}`,
+				Authorization: `Bearer ${process.env.HF_TOKEN}`,
 				"Content-Type": "application/json",
 			},
 			method: "POST",
diff --git a/packages/tasks-gen/snippets-fixtures/text-to-speech/js/fetch/0.fal-ai.js b/packages/tasks-gen/snippets-fixtures/text-to-speech/js/fetch/0.fal-ai.js
index e6ae2e3bab..dcc378b886 100644
--- a/packages/tasks-gen/snippets-fixtures/text-to-speech/js/fetch/0.fal-ai.js
+++ b/packages/tasks-gen/snippets-fixtures/text-to-speech/js/fetch/0.fal-ai.js
@@ -1,9 +1,9 @@
 async function query(data) {
 	const response = await fetch(
-		"https://fal.run/<fal-ai alias for nari-labs/Dia-1.6B>",
+		"https://router.huggingface.co/fal-ai/<fal-ai alias for nari-labs/Dia-1.6B>",
 		{
 			headers: {
-				Authorization: `Key ${process.env.FAL_AI_API_KEY}`,
+				Authorization: `Bearer ${process.env.HF_TOKEN}`,
 				"Content-Type": "application/json",
 			},
 			method: "POST",
diff --git a/packages/tasks-gen/snippets-fixtures/text-to-speech/python/requests/0.fal-ai.py b/packages/tasks-gen/snippets-fixtures/text-to-speech/python/requests/0.fal-ai.py
index 25078707da..0cc2f67e7b 100644
--- a/packages/tasks-gen/snippets-fixtures/text-to-speech/python/requests/0.fal-ai.py
+++ b/packages/tasks-gen/snippets-fixtures/text-to-speech/python/requests/0.fal-ai.py
@@ -1,9 +1,9 @@
 import os
 import requests
 
-API_URL = "https://fal.run/<fal-ai alias for nari-labs/Dia-1.6B>"
+API_URL = "https://router.huggingface.co/fal-ai/<fal-ai alias for nari-labs/Dia-1.6B>"
 headers = {
-    "Authorization": f"Key {os.environ['FAL_AI_API_KEY']}",
+    "Authorization": f"Bearer {os.environ['HF_TOKEN']}",
 }
 
 def query(payload):