Together.ai implem

julien-c · julien-c · commit 5629b8604853 · 2024-12-16T18:25:41.000+01:00
diff --git a/packages/inference/src/lib/makeRequestOptions.ts b/packages/inference/src/lib/makeRequestOptions.ts
@@ -1,4 +1,5 @@
 import { SAMBANOVA_API_BASE_URL, SAMBANOVA_MODEL_IDS } from "../providers/sambanova";
+import { TOGETHER_API_BASE_URL, TOGETHER_MODEL_IDS } from "../providers/together";
 import { INFERENCE_PROVIDERS, type InferenceTask, type Options, type RequestArgs } from "../types";
 import { omit } from "../utils/omit";
 import { HF_HUB_URL } from "./getDefaultTask";
@@ -66,6 +67,9 @@ export async function makeRequestOptions(
 			case "sambanova":
 				model = SAMBANOVA_MODEL_IDS[model];
 				break;
+			case "together":
+				model = TOGETHER_MODEL_IDS[model]?.id ?? model;
+				break;
 			default:
 				break;
 		}
@@ -113,6 +117,8 @@ export async function makeRequestOptions(
 				switch (provider) {
 					case "sambanova":
 						return SAMBANOVA_API_BASE_URL;
+					case "together":
+						return TOGETHER_API_BASE_URL;
 					default:
 						break;
 				}
diff --git a/packages/inference/src/providers/sambanova.ts b/packages/inference/src/providers/sambanova.ts
@@ -8,7 +8,7 @@ export const SAMBANOVA_API_BASE_URL = "https://api.sambanova.ai";
  * or keep it up-to-date.
  *
  * As a fallback, if the above is not possible, ask Sambanova to
- * provide the mapping as an API.
+ * provide the mapping as an fetchable API.
  */
 type SambanovaId = string;
 
diff --git a/packages/inference/src/providers/together.ts b/packages/inference/src/providers/together.ts
@@ -0,0 +1,60 @@
+import type { ModelId } from "../types";
+
+export const TOGETHER_API_BASE_URL = "https://api.together.xyz";
+
+/**
+ * Same comment as in sambanova.ts
+ */
+type TogetherId = string;
+
+/**
+ * https://docs.together.ai/reference/models-1
+ */
+export const TOGETHER_MODEL_IDS: Record<
+	ModelId,
+	{ id: TogetherId; type: "chat" | "embedding" | "image" | "language" | "moderation" }
+> = {
+	"BAAI/bge-base-en-v1.5": { id: "BAAI/bge-base-en-v1.5", type: "embedding" },
+	"black-forest-labs/FLUX.1-Canny-dev": { id: "black-forest-labs/FLUX.1-canny", type: "image" },
+	"black-forest-labs/FLUX.1-Depth-dev": { id: "black-forest-labs/FLUX.1-depth", type: "image" },
+	"black-forest-labs/FLUX.1-dev": { id: "black-forest-labs/FLUX.1-dev", type: "image" },
+	"black-forest-labs/FLUX.1-Redux-dev": { id: "black-forest-labs/FLUX.1-redux", type: "image" },
+	"black-forest-labs/FLUX.1-schnell": { id: "black-forest-labs/FLUX.1-pro", type: "image" },
+	"databricks/dbrx-instruct": { id: "databricks/dbrx-instruct", type: "chat" },
+	"deepseek-ai/deepseek-llm-67b-chat": { id: "deepseek-ai/deepseek-llm-67b-chat", type: "chat" },
+	"google/gemma-2-9b-it": { id: "google/gemma-2-9b-it", type: "chat" },
+	"google/gemma-2b-it": { id: "google/gemma-2-27b-it", type: "chat" },
+	"llava-hf/llava-v1.6-mistral-7b-hf": { id: "llava-hf/llava-v1.6-mistral-7b-hf", type: "chat" },
+	"meta-llama/Llama-2-13b-chat-hf": { id: "meta-llama/Llama-2-13b-chat-hf", type: "chat" },
+	"meta-llama/Llama-2-70b-hf": { id: "meta-llama/Llama-2-70b-hf", type: "language" },
+	"meta-llama/Llama-2-7b-chat-hf": { id: "meta-llama/Llama-2-7b-chat-hf", type: "chat" },
+	"meta-llama/Llama-3.2-11B-Vision-Instruct": { id: "meta-llama/Llama-Vision-Free", type: "chat" },
+	"meta-llama/Llama-3.2-3B-Instruct": { id: "meta-llama/Llama-3.2-3B-Instruct-Turbo", type: "chat" },
+	"meta-llama/Llama-3.2-90B-Vision-Instruct": { id: "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo", type: "chat" },
+	"meta-llama/Llama-3.3-70B-Instruct": { id: "meta-llama/Llama-3.3-70B-Instruct-Turbo", type: "chat" },
+	"meta-llama/Llama-Guard-3-11B-Vision": { id: "meta-llama/Llama-Guard-3-11B-Vision-Turbo", type: "moderation" },
+	"meta-llama/LlamaGuard-7b": { id: "Meta-Llama/Llama-Guard-7b", type: "moderation" },
+	"meta-llama/Meta-Llama-3-70B-Instruct": { id: "meta-llama/Llama-3-70b-chat-hf", type: "chat" },
+	"meta-llama/Meta-Llama-3-8B": { id: "meta-llama/Meta-Llama-3-8B", type: "language" },
+	"meta-llama/Meta-Llama-3-8B-Instruct": { id: "togethercomputer/Llama-3-8b-chat-hf-int4", type: "chat" },
+	"meta-llama/Meta-Llama-3.1-405B-Instruct": { id: "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", type: "chat" },
+	"meta-llama/Meta-Llama-3.1-70B-Instruct": { id: "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", type: "chat" },
+	"meta-llama/Meta-Llama-3.1-8B-Instruct": { id: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-128K", type: "chat" },
+	"microsoft/WizardLM-2-8x22B": { id: "microsoft/WizardLM-2-8x22B", type: "chat" },
+	"mistralai/Mistral-7B-Instruct-v0.3": { id: "mistralai/Mistral-7B-Instruct-v0.3", type: "chat" },
+	"mistralai/Mixtral-8x22B-Instruct-v0.1": { id: "mistralai/Mixtral-8x22B-Instruct-v0.1", type: "chat" },
+	"mistralai/Mixtral-8x7B-Instruct-v0.1": { id: "mistralai/Mixtral-8x7B-Instruct-v0.1", type: "chat" },
+	"mistralai/Mixtral-8x7B-v0.1": { id: "mistralai/Mixtral-8x7B-v0.1", type: "language" },
+	"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": { id: "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", type: "chat" },
+	"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": { id: "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", type: "chat" },
+	"Qwen/Qwen2-72B-Instruct": { id: "Qwen/Qwen2-72B-Instruct", type: "chat" },
+	"Qwen/Qwen2.5-72B-Instruct": { id: "Qwen/Qwen2.5-72B-Instruct-Turbo", type: "chat" },
+	"Qwen/Qwen2.5-7B-Instruct": { id: "Qwen/Qwen2.5-7B-Instruct-Turbo", type: "chat" },
+	"Qwen/Qwen2.5-Coder-32B-Instruct": { id: "Qwen/Qwen2.5-Coder-32B-Instruct", type: "chat" },
+	"Qwen/QwQ-32B-Preview": { id: "Qwen/QwQ-32B-Preview", type: "chat" },
+	"scb10x/llama-3-typhoon-v1.5-8b-instruct": { id: "scb10x/scb10x-llama3-typhoon-v1-5-8b-instruct", type: "chat" },
+	"scb10x/llama-3-typhoon-v1.5x-70b-instruct-awq": { id: "scb10x/scb10x-llama3-typhoon-v1-5x-4f316", type: "chat" },
+	"stabilityai/stable-diffusion-xl-base-1.0": { id: "stabilityai/stable-diffusion-xl-base-1.0", type: "image" },
+	"togethercomputer/m2-bert-80M-32k-retrieval": { id: "togethercomputer/m2-bert-80M-32k-retrieval", type: "embedding" },
+	"togethercomputer/m2-bert-80M-8k-retrieval": { id: "togethercomputer/m2-bert-80M-8k-retrieval", type: "embedding" },
+};
diff --git a/packages/inference/src/tasks/nlp/chatCompletion.ts b/packages/inference/src/tasks/nlp/chatCompletion.ts
@@ -22,7 +22,8 @@ export async function chatCompletion(
 		typeof res?.created === "number" &&
 		typeof res?.id === "string" &&
 		typeof res?.model === "string" &&
-		typeof res?.system_fingerprint === "string" &&
+		/// Together.ai does not output a system_fingerprint
+		(res.system_fingerprint === undefined || typeof res.system_fingerprint === "string") &&
 		typeof res?.usage === "object";
 
 	if (!isValidOutput) {
diff --git a/packages/inference/test/HfInference.spec.ts b/packages/inference/test/HfInference.spec.ts
@@ -786,7 +786,34 @@ describe.concurrent(
 					out += chunk.choices[0].delta.content;
 				}
 			}
-			console.warn(out);
+			expect(out).toContain("2");
+		});
+
+		it("chatCompletion together", async () => {
+			const hf = new HfInference(env.TOGETHER_KEY);
+			const res = await hf.chatCompletion({
+				model: "meta-llama/Llama-3.3-70B-Instruct",
+				provider: "together",
+				messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
+			});
+			if (res.choices && res.choices.length > 0) {
+				const completion = res.choices[0].message?.content;
+				expect(completion).toContain("two");
+			}
+		});
+		it("chatCompletion together stream", async () => {
+			const hf = new HfInference(env.TOGETHER_KEY);
+			const stream = hf.chatCompletionStream({
+				model: "meta-llama/Llama-3.3-70B-Instruct",
+				provider: "together",
+				messages: [{ role: "user", content: "Complete the equation 1 + 1 = , just the answer" }],
+			}) as AsyncGenerator<ChatCompletionStreamOutput>;
+			let out = "";
+			for await (const chunk of stream) {
+				if (chunk.choices && chunk.choices.length > 0) {
+					out += chunk.choices[0].delta.content;
+				}
+			}
 			expect(out).toContain("2");
 		});
 	},

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@ export const SAMBANOVA_API_BASE_URL = "https://api.sambanova.ai";`
`8`	`8`	`* or keep it up-to-date.`
`9`	`9`	`*`
`10`	`10`	`* As a fallback, if the above is not possible, ask Sambanova to`
`11`		`- * provide the mapping as an API.`
	`11`	`+ * provide the mapping as an fetchable API.`
`12`	`12`	`*/`
`13`	`13`	`type SambanovaId = string;`
`14`	`14`