Use new Cohere OpenAI compatible API

alexrs-cohere · alexrs-cohere · commit e1bf67ad87c3 · 2025-02-27T13:46:09.000Z
diff --git a/packages/inference/src/lib/makeRequestOptions.ts b/packages/inference/src/lib/makeRequestOptions.ts
@@ -28,7 +28,7 @@ let tasks: Record<string, { models: { id: string }[] }> | null = null;
  */
 const providerConfigs: Record<InferenceProvider, ProviderConfig> = {
 	"black-forest-labs": BLACK_FOREST_LABS_CONFIG,
-	"cohere": COHERE_CONFIG,
+	cohere: COHERE_CONFIG,
 	"fal-ai": FAL_AI_CONFIG,
 	"fireworks-ai": FIREWORKS_AI_CONFIG,
 	"hf-inference": HF_INFERENCE_CONFIG,
diff --git a/packages/inference/src/tasks/nlp/chatCompletion.ts b/packages/inference/src/tasks/nlp/chatCompletion.ts
@@ -1,72 +1,8 @@
 import { InferenceOutputError } from "../../lib/InferenceOutputError";
-import type { CohereTextGenerationOutputFinishReason, CohereMessage, CohereLogprob } from "../../providers/cohere";
 import type { BaseArgs, Options } from "../../types";
 import { request } from "../custom/request";
 import type { ChatCompletionInput, ChatCompletionOutput } from "@huggingface/tasks";
 
-interface CohereChatCompletionOutput {
-	id: string;
-	finish_reason: CohereTextGenerationOutputFinishReason;
-	message: CohereMessage;
-	usage: {
-		billed_units: {
-			input_tokens: number;
-			output_tokens: number;
-		};
-		tokens: {
-			input_tokens: number;
-			output_tokens: number;
-		};
-	};
-	logprobs?: CohereLogprob[]; // Optional field for log probabilities
-}
-
-function convertCohereToChatCompletionOutput(res: CohereChatCompletionOutput): ChatCompletionOutput {
-	// Create a ChatCompletionOutput object from the CohereChatCompletionOutput
-	return {
-		id: res.id,
-		created: Date.now(),
-		model: "cohere-model",
-		system_fingerprint: "cohere-fingerprint",
-		usage: {
-			completion_tokens: res.usage.tokens.output_tokens,
-			prompt_tokens: res.usage.tokens.input_tokens,
-			total_tokens: res.usage.tokens.input_tokens + res.usage.tokens.output_tokens,
-		},
-		choices: [
-			{
-				finish_reason: res.finish_reason,
-				index: 0,
-				message: {
-					role: res.message.role,
-					content: res.message.content.map((c) => c.text).join(" "),
-					tool_calls: res.message.tool_calls?.map((toolCall) => ({
-						function: {
-							arguments: toolCall.function.arguments,
-							description: toolCall.function.description,
-							name: toolCall.function.name,
-						},
-						id: toolCall.id,
-						type: toolCall.type,
-					})),
-				},
-				logprobs: res.logprobs
-					? {
-							content: res.logprobs.map((logprob) => ({
-								logprob: logprob.logprob,
-								token: logprob.token,
-								top_logprobs: logprob.top_logprobs.map((topLogprob) => ({
-									logprob: topLogprob.logprob,
-									token: topLogprob.token,
-								})),
-							})),
-					  }
-					: undefined,
-			},
-		],
-	};
-}
-
 /**
  * Use the chat completion endpoint to generate a response to a prompt, using OpenAI message completion API no stream
  */
@@ -95,4 +31,5 @@ export async function chatCompletion(
 	if (!isValidOutput) {
 		throw new InferenceOutputError("Expected ChatCompletionOutput");
 	}
+	return res;
 }
diff --git a/packages/inference/src/tasks/nlp/chatCompletionStream.ts b/packages/inference/src/tasks/nlp/chatCompletionStream.ts
@@ -1,77 +1,7 @@
-import type { CohereLogprob, CohereMessageDelta, CohereTextGenerationOutputFinishReason } from "../../providers/cohere";
 import type { BaseArgs, Options } from "../../types";
 import { streamingRequest } from "../custom/streamingRequest";
 import type { ChatCompletionInput, ChatCompletionStreamOutput } from "@huggingface/tasks";
 
-interface CohereChatCompletionStreamOutput {
-	id: string;
-	finish_reason?: CohereTextGenerationOutputFinishReason;
-	delta: {
-		message: CohereMessageDelta;
-	};
-	usage?: {
-		billed_units: {
-			input_tokens: number;
-			output_tokens: number;
-		};
-		tokens: {
-			input_tokens: number;
-			output_tokens: number;
-		};
-	};
-	logprobs?: CohereLogprob[];
-}
-
-function convertCohereToChatCompletionStreamOutput(res: CohereChatCompletionStreamOutput): ChatCompletionStreamOutput {
-	return {
-		id: res.id,
-		created: Date.now(), // Assuming the current timestamp as created time
-		model: "cohere-model", // Assuming a placeholder model name
-		system_fingerprint: "cohere-fingerprint", // Assuming a placeholder fingerprint
-		usage: res.usage
-			? {
-					completion_tokens: res.usage.tokens.output_tokens,
-					prompt_tokens: res.usage.tokens.input_tokens,
-					total_tokens: res.usage.tokens.input_tokens + res.usage.tokens.output_tokens,
-			  }
-			: undefined,
-		choices: [
-			{
-				delta: {
-					role: res.delta?.message?.role,
-					content: res.delta?.message?.content?.text,
-					tool_calls: res.delta?.message?.tool_calls
-						? {
-								function: {
-									arguments: JSON.stringify(res.delta?.message?.tool_calls[0]?.function.arguments), // Convert arguments to string
-									description: res.delta?.message?.tool_calls[0]?.function.description,
-									name: res.delta?.message?.tool_calls[0]?.function.name,
-								},
-								id: res.delta?.message?.tool_calls[0]?.id,
-								index: 0, // Assuming a single tool call with index 0
-								type: res.delta?.message?.tool_calls[0]?.type,
-						  }
-						: undefined,
-				},
-				finish_reason: res.finish_reason,
-				index: 0, // Assuming a single choice with index 0
-				logprobs: res.logprobs
-					? {
-							content: res.logprobs.map((logprob) => ({
-								logprob: logprob.logprob,
-								token: logprob.token,
-								top_logprobs: logprob.top_logprobs.map((topLogprob) => ({
-									logprob: topLogprob.logprob,
-									token: topLogprob.token,
-								})),
-							})),
-					  }
-					: undefined,
-			},
-		],
-	};
-}
-
 /**
  * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
  */
diff --git a/packages/inference/test/tapes.json b/packages/inference/test/tapes.json
@@ -7387,8 +7387,8 @@
       }
     }
   },
-  "772e481d98640490fca3aab8e7fed5b771ea213f2b76b2f8858ce7bc90acb16b": {
-    "url": "https://api.cohere.com/v2/chat",
+  "cb34d07934bd210fd64da207415c49fc6e2870d3564164a2a5d541f713227fbf": {
+    "url": "https://api.cohere.com/compatibility/v1/chat/completions",
     "init": {
       "headers": {
         "Content-Type": "application/json"
@@ -7397,7 +7397,7 @@
       "body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Say 'this is a test'\"}],\"stream\":true,\"model\":\"command-r7b-12-2024\"}"
     },
     "response": {
-      "body": "event: message-start\ndata: {\"id\":\"b9c2d3f2-4532-473d-a8a1-9236d159ab26\",\"type\":\"message-start\",\"delta\":{\"message\":{\"role\":\"assistant\",\"content\":[],\"tool_plan\":\"\",\"tool_calls\":[],\"citations\":[]}}}\n\nevent: content-start\ndata: {\"type\":\"content-start\",\"index\":0,\"delta\":{\"message\":{\"content\":{\"type\":\"text\",\"text\":\"\"}}}}\n\nevent: content-delta\ndata: {\"type\":\"content-delta\",\"index\":0,\"delta\":{\"message\":{\"content\":{\"text\":\"This\"}}}}\n\nevent: content-delta\ndata: {\"type\":\"content-delta\",\"index\":0,\"delta\":{\"message\":{\"content\":{\"text\":\" is\"}}}}\n\nevent: content-delta\ndata: {\"type\":\"content-delta\",\"index\":0,\"delta\":{\"message\":{\"content\":{\"text\":\" a\"}}}}\n\nevent: content-delta\ndata: {\"type\":\"content-delta\",\"index\":0,\"delta\":{\"message\":{\"content\":{\"text\":\" test\"}}}}\n\nevent: content-delta\ndata: {\"type\":\"content-delta\",\"index\":0,\"delta\":{\"message\":{\"content\":{\"text\":\".\"}}}}\n\nevent: content-end\ndata: {\"type\":\"content-end\",\"index\":0}\n\nevent: message-end\ndata: {\"type\":\"message-end\",\"delta\":{\"finish_reason\":\"COMPLETE\",\"usage\":{\"billed_units\":{\"input_tokens\":7,\"output_tokens\":5},\"tokens\":{\"input_tokens\":502,\"output_tokens\":7}}}}\n\ndata: [DONE]\n\n",
+      "body": "data: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":null,\"delta\":{\"content\":\"\",\"role\":\"assistant\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\"}\n\ndata: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":null,\"delta\":{\"content\":\"This\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\"}\n\ndata: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":null,\"delta\":{\"content\":\" is\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\"}\n\ndata: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":null,\"delta\":{\"content\":\" a\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\"}\n\ndata: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":null,\"delta\":{\"content\":\" test\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\"}\n\ndata: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":null,\"delta\":{\"content\":\".\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\"}\n\ndata: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":\"stop\",\"delta\":{}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\",\"usage\":{\"prompt_tokens\":7,\"completion_tokens\":5,\"total_tokens\":12}}\n\ndata: [DONE]\n\n",
       "status": 200,
       "statusText": "OK",
       "headers": {
@@ -7413,8 +7413,8 @@
       }
     }
   },
-  "545bf4e8393bc07dedb7c66d13846ff8264a49e909117c3c93ae35e30e705cbb": {
-    "url": "https://api.cohere.com/v2/chat",
+  "8c6ffbc794573c463ed5666e3b560e5966cd975c2893c901c18adb696ba54a6a": {
+    "url": "https://api.cohere.com/compatibility/v1/chat/completions",
     "init": {
       "headers": {
         "Content-Type": "application/json"
@@ -7423,7 +7423,7 @@
       "body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Complete this sentence with words, one plus one is equal \"}],\"model\":\"command-r7b-12-2024\"}"
     },
     "response": {
-      "body": "{\"id\":\"cd9a6a0f-5e4c-411f-9604-fd4bdffc3052\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"One plus one is equal to two.\"}]},\"finish_reason\":\"COMPLETE\",\"usage\":{\"billed_units\":{\"input_tokens\":11,\"output_tokens\":8},\"tokens\":{\"input_tokens\":507,\"output_tokens\":10}}}",
+      "body": "{\"id\":\"f8bf661b-c600-44e5-8412-df37c9dcd985\",\"choices\":[{\"index\":0,\"finish_reason\":\"stop\",\"message\":{\"role\":\"assistant\",\"content\":\"One plus one is equal to two.\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion\",\"usage\":{\"prompt_tokens\":11,\"completion_tokens\":8,\"total_tokens\":19}}",
       "status": 200,
       "statusText": "OK",
       "headers": {