diff --git a/README.md b/README.md index e46700c7bb..0b22295d85 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ await uploadFile({ } }); -// Use HF Inference API, or external Inference Providers! +// Use all supported Inference Providers! await inference.chatCompletion({ model: "meta-llama/Llama-3.1-8B-Instruct", @@ -55,7 +55,7 @@ await inference.textToImage({ This is a collection of JS libraries to interact with the Hugging Face API, with TS types included. -- [@huggingface/inference](packages/inference/README.md): Use HF Inference API (serverless), Inference Endpoints (dedicated) and all supported Inference Providers to make calls to 100,000+ Machine Learning models +- [@huggingface/inference](packages/inference/README.md): Use all supported (serverless) Inference Providers or switch to Inference Endpoints (dedicated) to make calls to 100,000+ Machine Learning models - [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files - [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface - [@huggingface/gguf](packages/gguf/README.md): A GGUF parser that works on remotely hosted files. @@ -128,10 +128,10 @@ import { InferenceClient } from "@huggingface/inference"; const HF_TOKEN = "hf_..."; -const inference = new InferenceClient(HF_TOKEN); +const client = new InferenceClient(HF_TOKEN); // Chat completion API -const out = await inference.chatCompletion({ +const out = await client.chatCompletion({ model: "meta-llama/Llama-3.1-8B-Instruct", messages: [{ role: "user", content: "Hello, nice to meet you!" }], max_tokens: 512 @@ -139,7 +139,7 @@ const out = await inference.chatCompletion({ console.log(out.choices[0].message); // Streaming chat completion API -for await (const chunk of inference.chatCompletionStream({ +for await (const chunk of client.chatCompletionStream({ model: "meta-llama/Llama-3.1-8B-Instruct", messages: [{ role: "user", content: "Hello, nice to meet you!" }], max_tokens: 512 @@ -148,14 +148,14 @@ for await (const chunk of inference.chatCompletionStream({ } /// Using a third-party provider: -await inference.chatCompletion({ +await client.chatCompletion({ model: "meta-llama/Llama-3.1-8B-Instruct", messages: [{ role: "user", content: "Hello, nice to meet you!" }], max_tokens: 512, provider: "sambanova", // or together, fal-ai, replicate, cohere … }) -await inference.textToImage({ +await client.textToImage({ model: "black-forest-labs/FLUX.1-dev", inputs: "a picture of a green bird", provider: "fal-ai", @@ -164,7 +164,7 @@ await inference.textToImage({ // You can also omit "model" to use the recommended model for the task -await inference.translation({ +await client.translation({ inputs: "My name is Wolfgang and I live in Amsterdam", parameters: { src_lang: "en", @@ -173,17 +173,17 @@ await inference.translation({ }); // pass multimodal files or URLs as inputs -await inference.imageToText({ +await client.imageToText({ model: 'nlpconnect/vit-gpt2-image-captioning', data: await (await fetch('https://picsum.photos/300/300')).blob(), }) // Using your own dedicated inference endpoint: https://hf.co/docs/inference-endpoints/ -const gpt2 = inference.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2'); -const { generated_text } = await gpt2.textGeneration({ inputs: 'The answer to the universe is' }); +const gpt2Client = client.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2'); +const { generated_text } = await gpt2Client.textGeneration({ inputs: 'The answer to the universe is' }); // Chat Completion -const llamaEndpoint = inference.endpoint( +const llamaEndpoint = client.endpoint( "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct" ); const out = await llamaEndpoint.chatCompletion({ diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts index 73d526f283..4ce55adb90 100644 --- a/packages/inference/test/InferenceClient.spec.ts +++ b/packages/inference/test/InferenceClient.spec.ts @@ -376,7 +376,7 @@ describe.concurrent("InferenceClient", () => { ); }); - it("textGeneration - gpt2", async () => { + it.skip("textGeneration - gpt2", async () => { expect( await hf.textGeneration({ model: "gpt2", @@ -387,7 +387,7 @@ describe.concurrent("InferenceClient", () => { }); }); - it("textGeneration - openai-community/gpt2", async () => { + it.skip("textGeneration - openai-community/gpt2", async () => { expect( await hf.textGeneration({ model: "openai-community/gpt2", diff --git a/packages/tasks-gen/scripts/inference-tgi-import.ts b/packages/tasks-gen/scripts/inference-tgi-import.ts index 27060fd82e..80342028e0 100644 --- a/packages/tasks-gen/scripts/inference-tgi-import.ts +++ b/packages/tasks-gen/scripts/inference-tgi-import.ts @@ -34,6 +34,13 @@ function toCamelCase(str: string, joiner = "") { .join(joiner); } +const OVERRIDES_TYPES_RENAME_PROPERTIES: Record> = { + ChatCompletionInputFunctionDefinition: { arguments: "parameters" }, +}; +const OVERRIDES_TYPES_OVERRIDE_PROPERTY_TYPE: Record> = { + ChatCompletionOutputFunctionDefinition: { arguments: { type: "string" } }, +}; + async function _extractAndAdapt(task: string, mainComponentName: string, type: "input" | "output" | "stream_output") { console.debug(`✨ Importing`, task, type); @@ -57,6 +64,17 @@ async function _extractAndAdapt(task: string, mainComponentName: string, type: " _scan(item); } } else if (data && typeof data === "object") { + /// This next section can be removed when we don't use TGI as source of types. + if (typeof data.title === "string" && data.title in OVERRIDES_TYPES_RENAME_PROPERTIES) { + const [[oldName, newName]] = Object.entries(OVERRIDES_TYPES_RENAME_PROPERTIES[data.title]); + data.required = JSON.parse(JSON.stringify(data.required).replaceAll(oldName, newName)); + data.properties = JSON.parse(JSON.stringify(data.properties).replaceAll(oldName, newName)); + } + if (typeof data.title === "string" && data.title in OVERRIDES_TYPES_OVERRIDE_PROPERTY_TYPE) { + const [[prop, newType]] = Object.entries(OVERRIDES_TYPES_OVERRIDE_PROPERTY_TYPE[data.title]); + (data.properties as Record)[prop] = newType; + } + /// End of overrides section for (const key of Object.keys(data)) { if (key === "$ref" && typeof data[key] === "string") { // Verify reference exists diff --git a/packages/tasks/src/tasks/chat-completion/inference.ts b/packages/tasks/src/tasks/chat-completion/inference.ts index f9ba525b2d..1dc674bf64 100644 --- a/packages/tasks/src/tasks/chat-completion/inference.ts +++ b/packages/tasks/src/tasks/chat-completion/inference.ts @@ -130,9 +130,9 @@ export interface ChatCompletionInputToolCall { [property: string]: unknown; } export interface ChatCompletionInputFunctionDefinition { - arguments: unknown; description?: string; name: string; + parameters: unknown; [property: string]: unknown; } export interface ChatCompletionInputGrammarType { @@ -235,7 +235,7 @@ export interface ChatCompletionOutputToolCall { [property: string]: unknown; } export interface ChatCompletionOutputFunctionDefinition { - arguments: unknown; + arguments: string; description?: string; name: string; [property: string]: unknown; diff --git a/packages/tasks/src/tasks/chat-completion/spec/input.json b/packages/tasks/src/tasks/chat-completion/spec/input.json index 53c7a8d4bb..821972faed 100644 --- a/packages/tasks/src/tasks/chat-completion/spec/input.json +++ b/packages/tasks/src/tasks/chat-completion/spec/input.json @@ -275,9 +275,9 @@ }, "ChatCompletionInputFunctionDefinition": { "type": "object", - "required": ["name", "arguments"], + "required": ["name", "parameters"], "properties": { - "arguments": {}, + "parameters": {}, "description": { "type": "string", "nullable": true diff --git a/packages/tasks/src/tasks/chat-completion/spec/output.json b/packages/tasks/src/tasks/chat-completion/spec/output.json index 559f8c2c3d..759d657bad 100644 --- a/packages/tasks/src/tasks/chat-completion/spec/output.json +++ b/packages/tasks/src/tasks/chat-completion/spec/output.json @@ -173,7 +173,9 @@ "type": "object", "required": ["name", "arguments"], "properties": { - "arguments": {}, + "arguments": { + "type": "string" + }, "description": { "type": "string", "nullable": true