diff --git a/docs/inference-providers/_toctree.yml b/docs/inference-providers/_toctree.yml index 0c097a586..744247abc 100644 --- a/docs/inference-providers/_toctree.yml +++ b/docs/inference-providers/_toctree.yml @@ -23,6 +23,8 @@ title: Featherless AI - local: providers/fireworks-ai title: Fireworks + - local: providers/groq + title: Groq - local: providers/hyperbolic title: Hyperbolic - local: providers/hf-inference diff --git a/docs/inference-providers/index.md b/docs/inference-providers/index.md index 8a01ae89d..cd8188b34 100644 --- a/docs/inference-providers/index.md +++ b/docs/inference-providers/index.md @@ -20,6 +20,7 @@ Here is the complete list of partners integrated with Inference Providers, and t | [Fal AI](./providers/fal-ai) | | | | ✅ | ✅ | | [Featherless AI](./providers/featherless-ai) | ✅ | | | | | | [Fireworks](./providers/fireworks-ai) | ✅ | ✅ | | | | +| [Groq](./providers/groq) | ✅ | | | | | | [HF Inference](./providers/hf-inference) | ✅ | ✅ | ✅ | ✅ | | | [Hyperbolic](./providers/hyperbolic) | ✅ | ✅ | | | | | [Nebius](./providers/nebius) | ✅ | ✅ | ✅ | ✅ | | diff --git a/docs/inference-providers/providers/cohere.md b/docs/inference-providers/providers/cohere.md index 28ee4c974..8edb2601a 100644 --- a/docs/inference-providers/providers/cohere.md +++ b/docs/inference-providers/providers/cohere.md @@ -56,6 +56,6 @@ Find out more about Chat Completion (VLM) [here](../tasks/chat-completion). diff --git a/docs/inference-providers/providers/groq.md b/docs/inference-providers/providers/groq.md new file mode 100644 index 000000000..c8c660557 --- /dev/null +++ b/docs/inference-providers/providers/groq.md @@ -0,0 +1,69 @@ + + +# Groq + +
+ + + + +
+ +
+ + + + +
+ +Groq is fast AI inference. Their groundbreaking LPU technology delivers record-setting performance and efficiency for GenAI models. With custom chips specifically designed for AI inference workloads and a deterministic, software-first approach, Groq eliminates the bottlenecks of conventional hardware to enable real-time AI applications with predictable latency and exceptional throughput so developers can build fast. + +For latest pricing, visit our [pricing page](https://groq.com/pricing/). + +## Resources + - **Website**: https://groq.com/ + - **Documentation**: https://console.groq.com/docs + - **Community Forum**: https://community.groq.com/ + - **X**: [@GroqInc](https://x.com/GroqInc) + - **LinkedIn**: [Groq](https://www.linkedin.com/company/groq/) + - **YouTube**: [Groq](https://www.youtube.com/@GroqInc) + +## Supported tasks + + +### Chat Completion (LLM) + +Find out more about Chat Completion (LLM) [here](../tasks/chat-completion). + + + + +### Chat Completion (VLM) + +Find out more about Chat Completion (VLM) [here](../tasks/chat-completion). + + + diff --git a/docs/inference-providers/providers/hf-inference.md b/docs/inference-providers/providers/hf-inference.md index 0fc37ece0..ee240d0b6 100644 --- a/docs/inference-providers/providers/hf-inference.md +++ b/docs/inference-providers/providers/hf-inference.md @@ -38,163 +38,146 @@ If you are interested in deploying models to a dedicated and autoscaling infrast ## Supported tasks - ### Automatic Speech Recognition Find out more about Automatic Speech Recognition [here](../tasks/automatic_speech_recognition). - ### Chat Completion (LLM) Find out more about Chat Completion (LLM) [here](../tasks/chat-completion). - ### Chat Completion (VLM) Find out more about Chat Completion (VLM) [here](../tasks/chat-completion). - ### Feature Extraction Find out more about Feature Extraction [here](../tasks/feature_extraction). - ### Fill Mask Find out more about Fill Mask [here](../tasks/fill_mask). - ### Image Classification Find out more about Image Classification [here](../tasks/image_classification). - ### Image Segmentation Find out more about Image Segmentation [here](../tasks/image_segmentation). - ### Object Detection Find out more about Object Detection [here](../tasks/object_detection). - ### Question Answering Find out more about Question Answering [here](../tasks/question_answering). - ### Summarization Find out more about Summarization [here](../tasks/summarization). - ### Table Question Answering Find out more about Table Question Answering [here](../tasks/table_question_answering). - ### Text Classification Find out more about Text Classification [here](../tasks/text_classification). - ### Text Generation Find out more about Text Generation [here](../tasks/text_generation). - ### Text To Image Find out more about Text To Image [here](../tasks/text_to_image). - ### Token Classification Find out more about Token Classification [here](../tasks/token_classification). - ### Translation Find out more about Translation [here](../tasks/translation). - diff --git a/docs/inference-providers/providers/nscale.md b/docs/inference-providers/providers/nscale.md index 5da7783ec..8aad21029 100644 --- a/docs/inference-providers/providers/nscale.md +++ b/docs/inference-providers/providers/nscale.md @@ -46,7 +46,7 @@ Find out more about Chat Completion (LLM) [here](../tasks/chat-completion). diff --git a/docs/inference-providers/providers/replicate.md b/docs/inference-providers/providers/replicate.md index 657cee3f5..c1cd26b93 100644 --- a/docs/inference-providers/providers/replicate.md +++ b/docs/inference-providers/providers/replicate.md @@ -54,6 +54,6 @@ Find out more about Text To Video [here](../tasks/text_to_video). diff --git a/docs/inference-providers/providers/together.md b/docs/inference-providers/providers/together.md index 8ccce4cfa..426de4744 100644 --- a/docs/inference-providers/providers/together.md +++ b/docs/inference-providers/providers/together.md @@ -44,7 +44,7 @@ Find out more about Chat Completion (LLM) [here](../tasks/chat-completion). @@ -64,7 +64,7 @@ Find out more about Text Generation [here](../tasks/text_generation). diff --git a/docs/inference-providers/tasks/chat-completion.md b/docs/inference-providers/tasks/chat-completion.md index 8a4db9151..4f20bfefc 100644 --- a/docs/inference-providers/tasks/chat-completion.md +++ b/docs/inference-providers/tasks/chat-completion.md @@ -25,6 +25,7 @@ This is a subtask of [`text-generation`](https://huggingface.co/docs/inference-p - [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models. - [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions. - [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft. +- [Qwen/Qwen2.5-7B-Instruct-1M](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-1M): Strong conversational model that supports very long instructions. - [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code. - [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model. @@ -60,7 +61,7 @@ The API supports: @@ -70,7 +71,7 @@ conversational /> diff --git a/docs/inference-providers/tasks/text-classification.md b/docs/inference-providers/tasks/text-classification.md index 5e30ee191..63f6551d7 100644 --- a/docs/inference-providers/tasks/text-classification.md +++ b/docs/inference-providers/tasks/text-classification.md @@ -31,36 +31,30 @@ Explore all available models and find the one that suits you best [here](https:/ ### Using the API - - - ### API specification #### Request -| Headers | | | -| :--- | :--- | :--- | +| Headers | | | +| :---------------- | :------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with "Inference Providers" permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens/new?ownUserPermissions=inference.serverless.write&tokenType=fineGrained). | - -| Payload | | | -| :--- | :--- | :--- | -| **inputs*** | _string_ | The text to classify | -| **parameters** | _object_ | | -| **        function_to_apply** | _enum_ | Possible values: sigmoid, softmax, none. | -| **        top_k** | _integer_ | When specified, limits the output to the top K most probable classes. | - +| Payload | | | +| :-------------------------------------------------------------------- | :-------- | :-------------------------------------------------------------------- | +| **inputs\*** | _string_ | The text to classify | +| **parameters** | _object_ | | +| **        function_to_apply** | _enum_ | Possible values: sigmoid, softmax, none. | +| **        top_k** | _integer_ | When specified, limits the output to the top K most probable classes. | #### Response -| Body | | -| :--- | :--- | :--- | -| **(array)** | _object[]_ | Output is an array of objects. | -| **        label** | _string_ | The predicted class label. | -| **        score** | _number_ | The corresponding probability. | - +| Body | | +| :-------------------------------------------------------- | :--------- | :----------------------------- | +| **(array)** | _object[]_ | Output is an array of objects. | +| **        label** | _string_ | The predicted class label. | +| **        score** | _number_ | The corresponding probability. | diff --git a/docs/inference-providers/tasks/text-generation.md b/docs/inference-providers/tasks/text-generation.md index 88c24b7b9..224f27bb9 100644 --- a/docs/inference-providers/tasks/text-generation.md +++ b/docs/inference-providers/tasks/text-generation.md @@ -37,127 +37,120 @@ Explore all available models and find the one that suits you best [here](https:/ ### Using the API - - - ### API specification #### Request -| Headers | | | -| :--- | :--- | :--- | +| Headers | | | +| :---------------- | :------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with "Inference Providers" permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens/new?ownUserPermissions=inference.serverless.write&tokenType=fineGrained). | - -| Payload | | | -| :--- | :--- | :--- | -| **inputs*** | _string_ | | -| **parameters** | _object_ | | -| **        adapter_id** | _string_ | Lora adapter id | -| **        best_of** | _integer_ | Generate best_of sequences and return the one if the highest token logprobs. | -| **        decoder_input_details** | _boolean_ | Whether to return decoder input token logprobs and ids. | -| **        details** | _boolean_ | Whether to return generation details. | -| **        do_sample** | _boolean_ | Activate logits sampling. | -| **        frequency_penalty** | _number_ | The parameter for frequency penalty. 1.0 means no penalty Penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. | -| **        grammar** | _unknown_ | One of the following: | -| **                 (#1)** | _object_ | | -| **                        type*** | _enum_ | Possible values: json. | -| **                        value*** | _unknown_ | A string that represents a [JSON Schema](https://json-schema.org/). JSON Schema is a declarative language that allows to annotate JSON documents with types and descriptions. | -| **                 (#2)** | _object_ | | -| **                        type*** | _enum_ | Possible values: regex. | -| **                        value*** | _string_ | | -| **                 (#3)** | _object_ | | -| **                        type*** | _enum_ | Possible values: json_schema. | -| **                        value*** | _object_ | | -| **                                name** | _string_ | Optional name identifier for the schema | -| **                                schema*** | _unknown_ | The actual JSON schema definition | -| **        max_new_tokens** | _integer_ | Maximum number of tokens to generate. | -| **        repetition_penalty** | _number_ | The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. | -| **        return_full_text** | _boolean_ | Whether to prepend the prompt to the generated text | -| **        seed** | _integer_ | Random sampling seed. | -| **        stop** | _string[]_ | Stop generating tokens if a member of `stop` is generated. | -| **        temperature** | _number_ | The value used to module the logits distribution. | -| **        top_k** | _integer_ | The number of highest probability vocabulary tokens to keep for top-k-filtering. | -| **        top_n_tokens** | _integer_ | The number of highest probability vocabulary tokens to keep for top-n-filtering. | -| **        top_p** | _number_ | Top-p value for nucleus sampling. | -| **        truncate** | _integer_ | Truncate inputs tokens to the given size. | -| **        typical_p** | _number_ | Typical Decoding mass See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information. | -| **        watermark** | _boolean_ | Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226). | -| **stream** | _boolean_ | | - +| Payload | | | +| :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **inputs\*** | _string_ | | +| **parameters** | _object_ | | +| **        adapter_id** | _string_ | Lora adapter id | +| **        best_of** | _integer_ | Generate best_of sequences and return the one if the highest token logprobs. | +| **        decoder_input_details** | _boolean_ | Whether to return decoder input token logprobs and ids. | +| **        details** | _boolean_ | Whether to return generation details. | +| **        do_sample** | _boolean_ | Activate logits sampling. | +| **        frequency_penalty** | _number_ | The parameter for frequency penalty. 1.0 means no penalty Penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. | +| **        grammar** | _unknown_ | One of the following: | +| **                 (#1)** | _object_ | | +| **                        type\*** | _enum_ | Possible values: json. | +| **                        value\*** | _unknown_ | A string that represents a [JSON Schema](https://json-schema.org/). JSON Schema is a declarative language that allows to annotate JSON documents with types and descriptions. | +| **                 (#2)** | _object_ | | +| **                        type\*** | _enum_ | Possible values: regex. | +| **                        value\*** | _string_ | | +| **                 (#3)** | _object_ | | +| **                        type\*** | _enum_ | Possible values: json_schema. | +| **                        value\*** | _object_ | | +| **                                name** | _string_ | Optional name identifier for the schema | +| **                                schema\*** | _unknown_ | The actual JSON schema definition | +| **        max_new_tokens** | _integer_ | Maximum number of tokens to generate. | +| **        repetition_penalty** | _number_ | The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. | +| **        return_full_text** | _boolean_ | Whether to prepend the prompt to the generated text | +| **        seed** | _integer_ | Random sampling seed. | +| **        stop** | _string[]_ | Stop generating tokens if a member of `stop` is generated. | +| **        temperature** | _number_ | The value used to module the logits distribution. | +| **        top_k** | _integer_ | The number of highest probability vocabulary tokens to keep for top-k-filtering. | +| **        top_n_tokens** | _integer_ | The number of highest probability vocabulary tokens to keep for top-n-filtering. | +| **        top_p** | _number_ | Top-p value for nucleus sampling. | +| **        truncate** | _integer_ | Truncate inputs tokens to the given size. | +| **        typical_p** | _number_ | Typical Decoding mass See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information. | +| **        watermark** | _boolean_ | Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226). | +| **stream** | _boolean_ | | #### Response Output type depends on the `stream` input parameter. If `stream` is `false` (default), the response will be a JSON object with the following fields: -| Body | | -| :--- | :--- | :--- | -| **details** | _object_ | | -| **        best_of_sequences** | _object[]_ | | -| **                finish_reason** | _enum_ | Possible values: length, eos_token, stop_sequence. | -| **                generated_text** | _string_ | | -| **                generated_tokens** | _integer_ | | -| **                prefill** | _object[]_ | | -| **                        id** | _integer_ | | -| **                        logprob** | _number_ | | -| **                        text** | _string_ | | -| **                seed** | _integer_ | | -| **                tokens** | _object[]_ | | -| **                        id** | _integer_ | | -| **                        logprob** | _number_ | | -| **                        special** | _boolean_ | | -| **                        text** | _string_ | | -| **                top_tokens** | _array[]_ | | -| **                        id** | _integer_ | | -| **                        logprob** | _number_ | | -| **                        special** | _boolean_ | | -| **                        text** | _string_ | | -| **        finish_reason** | _enum_ | Possible values: length, eos_token, stop_sequence. | -| **        generated_tokens** | _integer_ | | -| **        prefill** | _object[]_ | | -| **                id** | _integer_ | | -| **                logprob** | _number_ | | -| **                text** | _string_ | | -| **        seed** | _integer_ | | -| **        tokens** | _object[]_ | | -| **                id** | _integer_ | | -| **                logprob** | _number_ | | -| **                special** | _boolean_ | | -| **                text** | _string_ | | -| **        top_tokens** | _array[]_ | | -| **                id** | _integer_ | | -| **                logprob** | _number_ | | -| **                special** | _boolean_ | | -| **                text** | _string_ | | -| **generated_text** | _string_ | | - +| Body | | +| :---------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------- | :------------------------------------------------- | +| **details** | _object_ | | +| **        best_of_sequences** | _object[]_ | | +| **                finish_reason** | _enum_ | Possible values: length, eos_token, stop_sequence. | +| **                generated_text** | _string_ | | +| **                generated_tokens** | _integer_ | | +| **                prefill** | _object[]_ | | +| **                        id** | _integer_ | | +| **                        logprob** | _number_ | | +| **                        text** | _string_ | | +| **                seed** | _integer_ | | +| **                tokens** | _object[]_ | | +| **                        id** | _integer_ | | +| **                        logprob** | _number_ | | +| **                        special** | _boolean_ | | +| **                        text** | _string_ | | +| **                top_tokens** | _array[]_ | | +| **                        id** | _integer_ | | +| **                        logprob** | _number_ | | +| **                        special** | _boolean_ | | +| **                        text** | _string_ | | +| **        finish_reason** | _enum_ | Possible values: length, eos_token, stop_sequence. | +| **        generated_tokens** | _integer_ | | +| **        prefill** | _object[]_ | | +| **                id** | _integer_ | | +| **                logprob** | _number_ | | +| **                text** | _string_ | | +| **        seed** | _integer_ | | +| **        tokens** | _object[]_ | | +| **                id** | _integer_ | | +| **                logprob** | _number_ | | +| **                special** | _boolean_ | | +| **                text** | _string_ | | +| **        top_tokens** | _array[]_ | | +| **                id** | _integer_ | | +| **                logprob** | _number_ | | +| **                special** | _boolean_ | | +| **                text** | _string_ | | +| **generated_text** | _string_ | | If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE). For more information about streaming, check out [this guide](https://huggingface.co/docs/text-generation-inference/conceptual/streaming). -| Body | | -| :--- | :--- | :--- | -| **details** | _object_ | | -| **        finish_reason** | _enum_ | Possible values: length, eos_token, stop_sequence. | -| **        generated_tokens** | _integer_ | | -| **        input_length** | _integer_ | | -| **        seed** | _integer_ | | -| **generated_text** | _string_ | | -| **index** | _integer_ | | -| **token** | _object_ | | -| **        id** | _integer_ | | -| **        logprob** | _number_ | | -| **        special** | _boolean_ | | -| **        text** | _string_ | | -| **top_tokens** | _object[]_ | | -| **        id** | _integer_ | | -| **        logprob** | _number_ | | -| **        special** | _boolean_ | | -| **        text** | _string_ | | - +| Body | | +| :------------------------------------------------------------------- | :--------- | :------------------------------------------------- | +| **details** | _object_ | | +| **        finish_reason** | _enum_ | Possible values: length, eos_token, stop_sequence. | +| **        generated_tokens** | _integer_ | | +| **        input_length** | _integer_ | | +| **        seed** | _integer_ | | +| **generated_text** | _string_ | | +| **index** | _integer_ | | +| **token** | _object_ | | +| **        id** | _integer_ | | +| **        logprob** | _number_ | | +| **        special** | _boolean_ | | +| **        text** | _string_ | | +| **top_tokens** | _object[]_ | | +| **        id** | _integer_ | | +| **        logprob** | _number_ | | +| **        special** | _boolean_ | | +| **        text** | _string_ | | diff --git a/docs/inference-providers/tasks/text-to-video.md b/docs/inference-providers/tasks/text-to-video.md index 733b16be4..5a29e371d 100644 --- a/docs/inference-providers/tasks/text-to-video.md +++ b/docs/inference-providers/tasks/text-to-video.md @@ -35,7 +35,7 @@ Explore all available models and find the one that suits you best [here](https:/ diff --git a/scripts/inference-providers/scripts/generate.ts b/scripts/inference-providers/scripts/generate.ts index a55b1fa95..90ed17025 100644 --- a/scripts/inference-providers/scripts/generate.ts +++ b/scripts/inference-providers/scripts/generate.ts @@ -38,6 +38,7 @@ const PROVIDERS_HUB_ORGS: Record = { "fal-ai": "fal", "featherless-ai": "featherless-ai", "fireworks-ai": "fireworks-ai", + groq: "groq", "hf-inference": "hf-inference", hyperbolic: "Hyperbolic", nebius: "nebius", @@ -54,6 +55,7 @@ const PROVIDERS_URLS: Record = { "fal-ai": "https://fal.ai/", "featherless-ai": "https://featherless.ai/", "fireworks-ai": "https://fireworks.ai/", + groq: "https://groq.com/", "hf-inference": "https://huggingface.co/", hyperbolic: "https://hyperbolic.xyz/", nebius: "https://nebius.com/", @@ -128,7 +130,7 @@ await Promise.all( } } } - }) + }), ); //////////////////////// @@ -144,7 +146,7 @@ const TASKS_DOCS_DIR = path.join(DOCS_DIR, "inference-providers", "tasks"); const PROVIDERS_DOCS_DIR = path.join( DOCS_DIR, "inference-providers", - "providers" + "providers", ); const NBSP = " "; // non-breaking space @@ -152,12 +154,12 @@ const TABLE_INDENT = NBSP.repeat(8); function readTemplate( templateName: string, - namespace: string + namespace: string, ): Promise { const templatePath = path.join( TEMPLATE_DIR, namespace, - `${templateName}.handlebars` + `${templateName}.handlebars`, ); console.log(` 🔍 Reading ${templateName}.handlebars`); return fs.readFile(templatePath, { encoding: "utf-8" }); @@ -171,13 +173,13 @@ function writeTaskDoc(templateName: string, content: string): Promise { return fs .mkdir(TASKS_DOCS_DIR, { recursive: true }) .then(() => - fs.writeFile(taskDocPath, contentWithHeader, { encoding: "utf-8" }) + fs.writeFile(taskDocPath, contentWithHeader, { encoding: "utf-8" }), ); } function writeProviderDoc( templateName: string, - content: string + content: string, ): Promise { const providerDocPath = path.join(PROVIDERS_DOCS_DIR, `${templateName}.md`); console.log(` 💾 Saving to ${providerDocPath}`); @@ -186,7 +188,7 @@ function writeProviderDoc( return fs .mkdir(TASKS_DOCS_DIR, { recursive: true }) .then(() => - fs.writeFile(providerDocPath, contentWithHeader, { encoding: "utf-8" }) + fs.writeFile(providerDocPath, contentWithHeader, { encoding: "utf-8" }), ); } @@ -206,13 +208,13 @@ const TASKS_DATA = (await authFetchJson(TASKS_API_URL)) as any; type SpecNameType = "input" | "output" | "stream_output"; const SPECS_URL_TEMPLATE = Handlebars.compile( - `https://raw.githubusercontent.com/huggingface/huggingface.js/${SPECS_REVISION}/packages/tasks/src/tasks/{{task}}/spec/{{name}}.json` + `https://raw.githubusercontent.com/huggingface/huggingface.js/${SPECS_REVISION}/packages/tasks/src/tasks/{{task}}/spec/{{name}}.json`, ); const COMMON_DEFINITIONS_URL = `https://raw.githubusercontent.com/huggingface/huggingface.js/${SPECS_REVISION}/packages/tasks/src/tasks/common-definitions.json`; async function fetchOneSpec( task: PipelineType, - name: SpecNameType + name: SpecNameType, ): Promise { const url = SPECS_URL_TEMPLATE({ task, name }); console.log(` 🕸️ Fetching ${task} ${name} specs`); @@ -220,7 +222,7 @@ async function fetchOneSpec( } async function fetchSpecs( - task: PipelineType + task: PipelineType, ): Promise< Record<"input" | "output" | "stream_output", JsonObject | undefined> > { @@ -258,7 +260,7 @@ function processPayloadSchema(schema: any): JsonObject[] { key: string, value: any, required: boolean, - parentPrefix: string + parentPrefix: string, ): void { const isRequired = required; let type = value.type || "unknown"; @@ -322,9 +324,9 @@ function processPayloadSchema(schema: any): JsonObject[] { nestedKey, nestedValue, nestedRequired, - parentPrefix + TABLE_INDENT + parentPrefix + TABLE_INDENT, ); - } + }, ); } else if (isArray) { // Process array items @@ -342,7 +344,7 @@ function processPayloadSchema(schema: any): JsonObject[] { `${NBSP}(#${index + 1})`, subSchema, false, - parentPrefix + TABLE_INDENT + parentPrefix + TABLE_INDENT, ); }); } @@ -384,32 +386,32 @@ For more details about the \`{{task}}\` task, check out its [dedicated page](htt `); const TIP_LIST_MODELS_LINK_TEMPLATE = Handlebars.compile( - `Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag={{task}}&sort=trending).` + `Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag={{task}}&sort=trending).`, ); const SPECS_HEADERS = await readTemplate("specs-headers", "common"); const PAGE_HEADER = Handlebars.compile( - await readTemplate("page-header", "common") + await readTemplate("page-header", "common"), ); const PROVIDER_PAGE_HEADER = Handlebars.compile( - await readTemplate("provider-header", "common") + await readTemplate("provider-header", "common"), ); const SNIPPETS_TEMPLATE = Handlebars.compile( - await readTemplate("snippets-template", "common") + await readTemplate("snippets-template", "common"), ); const SPECS_PAYLOAD_TEMPLATE = Handlebars.compile( - await readTemplate("specs-payload", "common") + await readTemplate("specs-payload", "common"), ); const SPECS_OUTPUT_TEMPLATE = Handlebars.compile( - await readTemplate("specs-output", "common") + await readTemplate("specs-output", "common"), ); const PROVIDER_TASKS_TEMPLATE = Handlebars.compile( - await readTemplate("provider-tasks", "common") + await readTemplate("provider-tasks", "common"), ); const PROVIDER_LOGO_TEMPLATE = Handlebars.compile( - await readTemplate("provider-logo", "common") + await readTemplate("provider-logo", "common"), ); const FOLLOW_US_BUTTON_TEMPLATE = Handlebars.compile( - await readTemplate("follow-us-button", "common") + await readTemplate("follow-us-button", "common"), ); //////////////////// @@ -479,15 +481,15 @@ await Promise.all( const modelData = await authFetchJson(url); model.inference = modelData.inference; model.tags = modelData.tags; - } - ) + }, + ), ); - }) + }), ); async function fetchWarmModels( task: PipelineType, - conversational: boolean = false + conversational: boolean = false, ): Promise< { modelId: string; @@ -509,7 +511,7 @@ async function fetchWarmModels( await Promise.all( providers.map(async (provider) => { console.log( - ` ⚡ Fetching most popular warm model for ${task} from ${provider}` + ` ⚡ Fetching most popular warm model for ${task} from ${provider}`, ); const url = `https://huggingface.co/api/models?pipeline_tag=${task}&inference_provider=${provider}&sort=likes30d&expand[]=inferenceProviderMapping&expand[]=tags&limit=5`; const modelsData = (await authFetchJson(url)) as { @@ -520,7 +522,7 @@ async function fetchWarmModels( }[]; if (modelsData.length === 0) { console.warn( - ` ⚠️ No warm model found for ${task} from ${provider}` + ` ⚠️ No warm model found for ${task} from ${provider}`, ); return; } @@ -538,7 +540,7 @@ async function fetchWarmModels( return; } const providerData = providerMapping.filter( - (mapping) => mapping.provider === provider + (mapping) => mapping.provider === provider, )[0]; return { modelId: topModelData.id, @@ -547,7 +549,7 @@ async function fetchWarmModels( providerTask: providerData.task, tags: topModelData.tags, }; - }) + }), ) ).filter((model) => model !== undefined); } @@ -556,13 +558,13 @@ async function fetchWarmModels( await Promise.all( TASKS.map(async (task) => { DATA.perProviderWarmModels[task] = await fetchWarmModels(task); - }) + }), ); // Filter recommended models (i.e. recommended + warm) TASKS.forEach((task) => { DATA.recommendedModels[task] = TASKS_DATA[task].models.filter( - (model: { inference: string }) => model.inference === "warm" + (model: { inference: string }) => model.inference === "warm", ); }); @@ -573,7 +575,7 @@ function buildProviderMapping( providerModelId: string; providerTask: string; tags: string[]; - }[] + }[], ): Record { return models.reduce( (acc, item) => { @@ -583,14 +585,14 @@ function buildProviderMapping( }; return acc; }, - {} as Record + {} as Record, ); } // Generate snippets TASKS.forEach((task) => { const providersMapping = buildProviderMapping( - DATA.perProviderWarmModels[task] + DATA.perProviderWarmModels[task], ); DATA.snippets[task] = SNIPPETS_TEMPLATE({ @@ -621,7 +623,7 @@ await Promise.all( }) : undefined, }; - }) + }), ); // Render tips @@ -637,7 +639,7 @@ TASKS.forEach((task) => { async function fetchChatCompletion() { // Conversational text-generation console.log( - " ⚡ Prepare data for chat-completion (conversational text-generation)" + " ⚡ Prepare data for chat-completion (conversational text-generation)", ); DATA.recommendedModels["chat-completion"] = DATA.recommendedModels[ "text-generation" @@ -645,10 +647,10 @@ async function fetchChatCompletion() { DATA.perProviderWarmModels["chat-completion"] = await fetchWarmModels( "text-generation", - true + true, ); const providersMappingChatCompletion = buildProviderMapping( - DATA.perProviderWarmModels["chat-completion"] + DATA.perProviderWarmModels["chat-completion"], ); DATA.snippets["chat-completion"] = SNIPPETS_TEMPLATE({ task: "text-generation", @@ -661,18 +663,18 @@ async function fetchChatCompletion() { // Conversational image-text-to-text console.log( - " ⚡ Prepare data for chat-completion (conversational image-text-to-text)" + " ⚡ Prepare data for chat-completion (conversational image-text-to-text)", ); DATA.recommendedModels["conversational-image-text-to-text"] = DATA.recommendedModels["image-text-to-text"].filter((model) => - model.tags?.includes("conversational") + model.tags?.includes("conversational"), ); DATA.perProviderWarmModels["image-text-to-text"] = await fetchWarmModels( "image-text-to-text", - true + true, ); const providersMappingImageTextToText = buildProviderMapping( - DATA.perProviderWarmModels["image-text-to-text"] + DATA.perProviderWarmModels["image-text-to-text"], ); DATA.snippets["conversational-image-text-to-text"] = SNIPPETS_TEMPLATE({ @@ -711,7 +713,7 @@ Object.entries(DATA.perProviderWarmModels).forEach(([task, models]) => { PER_PROVIDER_TASKS[model.provider] = []; } let conversational = ["chat-completion", "image-text-to-text"].includes( - task + task, ); let title = conversational ? task == "image-text-to-text" @@ -744,7 +746,7 @@ Object.entries(DATA.perProviderWarmModels).forEach(([task, models]) => { // sort tasks by title Object.entries(PER_PROVIDER_TASKS).forEach(([provider, tasks]) => { PER_PROVIDER_TASKS[provider] = tasks.sort((a, b) => - a.title.localeCompare(b.title) + a.title.localeCompare(b.title), ); }); @@ -755,11 +757,11 @@ Object.entries(PER_PROVIDER_TASKS).forEach(([provider, tasks]) => { async function renderTemplate( templateName: string, namespace: string, - data: JsonObject + data: JsonObject, ): Promise { console.log(`🎨 Rendering ${templateName} (${namespace})`); const template = Handlebars.compile( - await readTemplate(templateName, namespace) + await readTemplate(templateName, namespace), ); return template(data); } @@ -772,7 +774,7 @@ await Promise.all( // @ts-ignore const rendered = await renderTemplate(task, "task", DATA); await writeTaskDoc(task, rendered); - }) + }), ); await Promise.all( @@ -790,7 +792,7 @@ await Promise.all( }), }); await writeProviderDoc(provider, rendered); - }) + }), ); console.log("✅ All done!"); diff --git a/scripts/inference-providers/templates/providers/groq.handlebars b/scripts/inference-providers/templates/providers/groq.handlebars new file mode 100644 index 000000000..d25d4c4c7 --- /dev/null +++ b/scripts/inference-providers/templates/providers/groq.handlebars @@ -0,0 +1,19 @@ +# Groq + +{{{logoSection}}} + +{{{followUsSection}}} + +Groq is fast AI inference. Their groundbreaking LPU technology delivers record-setting performance and efficiency for GenAI models. With custom chips specifically designed for AI inference workloads and a deterministic, software-first approach, Groq eliminates the bottlenecks of conventional hardware to enable real-time AI applications with predictable latency and exceptional throughput so developers can build fast. + +For latest pricing, visit our [pricing page](https://groq.com/pricing/). + +## Resources + - **Website**: https://groq.com/ + - **Documentation**: https://console.groq.com/docs + - **Community Forum**: https://community.groq.com/ + - **X**: [@GroqInc](https://x.com/GroqInc) + - **LinkedIn**: [Groq](https://www.linkedin.com/company/groq/) + - **YouTube**: [Groq](https://www.youtube.com/@GroqInc) + +{{{tasksSection}}} \ No newline at end of file