diff --git a/docs/api-inference/tasks/automatic-speech-recognition.md b/docs/api-inference/tasks/automatic-speech-recognition.md index 7d7a2cc0a..af43524c5 100644 --- a/docs/api-inference/tasks/automatic-speech-recognition.md +++ b/docs/api-inference/tasks/automatic-speech-recognition.md @@ -30,7 +30,6 @@ For more details about the `automatic-speech-recognition` task, check out its [d ### Recommended models - [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3): A powerful ASR model by OpenAI. -- [facebook/seamless-m4t-v2-large](https://huggingface.co/facebook/seamless-m4t-v2-large): An end-to-end model that performs ASR and Speech Translation by MetaAI. - [pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1): Powerful speaker diarization model. This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=automatic-speech-recognition&sort=trending). @@ -117,9 +116,9 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/ | **                epsilon_cutoff** | _number_ | If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details. | | **                eta_cutoff** | _number_ | Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details. | | **                max_length** | _integer_ | The maximum length (in tokens) of the generated text, including the input. | -| **                max_new_tokens** | _integer_ | The maximum number of tokens to generate. Takes precedence over maxLength. | +| **                max_new_tokens** | _integer_ | The maximum number of tokens to generate. Takes precedence over max_length. | | **                min_length** | _integer_ | The minimum length (in tokens) of the generated text, including the input. | -| **                min_new_tokens** | _integer_ | The minimum number of tokens to generate. Takes precedence over maxLength. | +| **                min_new_tokens** | _integer_ | The minimum number of tokens to generate. Takes precedence over min_length. | | **                do_sample** | _boolean_ | Whether to use sampling instead of greedy decoding when generating new tokens. | | **                early_stopping** | _enum_ | Possible values: never, true, false. | | **                num_beams** | _integer_ | Number of beams to use for beam search. | diff --git a/docs/api-inference/tasks/chat-completion.md b/docs/api-inference/tasks/chat-completion.md index 249318eda..367be55db 100644 --- a/docs/api-inference/tasks/chat-completion.md +++ b/docs/api-inference/tasks/chat-completion.md @@ -109,25 +109,39 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/ | **logprobs** | _boolean_ | Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. | | **max_tokens** | _integer_ | The maximum number of tokens that can be generated in the chat completion. | | **messages*** | _object[]_ | A list of messages comprising the conversation so far. | -| **        content** | _string_ | | +| **        content*** | _unknown_ | One of the following: | +| **                 (#1)** | _string_ | | +| **                 (#2)** | _object[]_ | | +| **                         (#1)** | _object_ | | +| **                                text*** | _string_ | | +| **                                type*** | _enum_ | Possible values: text. | +| **                         (#2)** | _object_ | | +| **                                image_url*** | _object_ | | +| **                                        url*** | _string_ | | +| **                                type*** | _enum_ | Possible values: image_url. | | **        name** | _string_ | | | **        role*** | _string_ | | -| **        tool_calls** | _object[]_ | | -| **                function*** | _object_ | | -| **                        arguments*** | _unknown_ | | -| **                        description** | _string_ | | -| **                        name*** | _string_ | | -| **                id*** | _integer_ | | -| **                type*** | _string_ | | | **presence_penalty** | _number_ | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics | +| **response_format** | _unknown_ | One of the following: | +| **         (#1)** | _object_ | | +| **                type*** | _enum_ | Possible values: json. | +| **                value*** | _unknown_ | A string that represents a [JSON Schema](https://json-schema.org/). JSON Schema is a declarative language that allows to annotate JSON documents with types and descriptions. | +| **         (#2)** | _object_ | | +| **                type*** | _enum_ | Possible values: regex. | +| **                value*** | _string_ | | | **seed** | _integer_ | | | **stop** | _string[]_ | Up to 4 sequences where the API will stop generating further tokens. | | **stream** | _boolean_ | | +| **stream_options** | _object_ | | +| **        include_usage*** | _boolean_ | If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value. | | **temperature** | _number_ | What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. | | **tool_choice** | _unknown_ | One of the following: | -| **         (#1)** | | | -| **                FunctionName*** | _string_ | | -| **         (#2)** | | Possible values: OneOf. | +| **         (#1)** | _object_ | | +| **         (#2)** | _string_ | | +| **         (#3)** | _object_ | | +| **                function*** | _object_ | | +| **                        name*** | _string_ | | +| **         (#4)** | _object_ | | | **tool_prompt** | _string_ | A prompt to be appended before the tools | | **tools** | _object[]_ | A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. | | **        function*** | _object_ | | @@ -166,21 +180,22 @@ If `stream` is `false` (default), the response will be a JSON object with the fo | **                        top_logprobs** | _object[]_ | | | **                                logprob** | _number_ | | | **                                token** | _string_ | | -| **        message** | _object_ | | -| **                content** | _string_ | | -| **                name** | _string_ | | -| **                role** | _string_ | | -| **                tool_calls** | _object[]_ | | -| **                        function** | _object_ | | -| **                                arguments** | _unknown_ | | -| **                                description** | _string_ | | -| **                                name** | _string_ | | -| **                        id** | _integer_ | | -| **                        type** | _string_ | | +| **        message** | _unknown_ | One of the following: | +| **                 (#1)** | _object_ | | +| **                        content** | _string_ | | +| **                        role** | _string_ | | +| **                 (#2)** | _object_ | | +| **                        role** | _string_ | | +| **                        tool_calls** | _object[]_ | | +| **                                function** | _object_ | | +| **                                        arguments** | _unknown_ | | +| **                                        description** | _string_ | | +| **                                        name** | _string_ | | +| **                                id** | _string_ | | +| **                                type** | _string_ | | | **created** | _integer_ | | | **id** | _string_ | | | **model** | _string_ | | -| **object** | _string_ | | | **system_fingerprint** | _string_ | | | **usage** | _object_ | | | **        completion_tokens** | _integer_ | | @@ -194,16 +209,19 @@ For more information about streaming, check out [this guide](https://huggingface | Body | | | :--- | :--- | :--- | | **choices** | _object[]_ | | -| **        delta** | _object_ | | -| **                content** | _string_ | | -| **                role** | _string_ | | -| **                tool_calls** | _object_ | | -| **                        function** | _object_ | | -| **                                arguments** | _string_ | | -| **                                name** | _string_ | | -| **                        id** | _string_ | | -| **                        index** | _integer_ | | -| **                        type** | _string_ | | +| **        delta** | _unknown_ | One of the following: | +| **                 (#1)** | _object_ | | +| **                        content** | _string_ | | +| **                        role** | _string_ | | +| **                 (#2)** | _object_ | | +| **                        role** | _string_ | | +| **                        tool_calls** | _object_ | | +| **                                function** | _object_ | | +| **                                        arguments** | _string_ | | +| **                                        name** | _string_ | | +| **                                id** | _string_ | | +| **                                index** | _integer_ | | +| **                                type** | _string_ | | | **        finish_reason** | _string_ | | | **        index** | _integer_ | | | **        logprobs** | _object_ | | @@ -216,7 +234,10 @@ For more information about streaming, check out [this guide](https://huggingface | **created** | _integer_ | | | **id** | _string_ | | | **model** | _string_ | | -| **object** | _string_ | | | **system_fingerprint** | _string_ | | +| **usage** | _object_ | | +| **        completion_tokens** | _integer_ | | +| **        prompt_tokens** | _integer_ | | +| **        total_tokens** | _integer_ | | diff --git a/docs/api-inference/tasks/text-classification.md b/docs/api-inference/tasks/text-classification.md index 2ddea6833..d014a40f2 100644 --- a/docs/api-inference/tasks/text-classification.md +++ b/docs/api-inference/tasks/text-classification.md @@ -28,7 +28,6 @@ For more details about the `text-classification` task, check out its [dedicated - [ProsusAI/finbert](https://huggingface.co/ProsusAI/finbert): A sentiment analysis model specialized in financial sentiment. - [cardiffnlp/twitter-roberta-base-sentiment-latest](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest): A sentiment analysis model specialized in analyzing tweets. - [papluca/xlm-roberta-base-language-detection](https://huggingface.co/papluca/xlm-roberta-base-language-detection): A model that can classify languages. -- [meta-llama/Prompt-Guard-86M](https://huggingface.co/meta-llama/Prompt-Guard-86M): A model that can classify text generation attacks. This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-classification&sort=trending). diff --git a/docs/api-inference/tasks/text-generation.md b/docs/api-inference/tasks/text-generation.md index 22ee84e1a..e69e96dc0 100644 --- a/docs/api-inference/tasks/text-generation.md +++ b/docs/api-inference/tasks/text-generation.md @@ -108,30 +108,31 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/ | :--- | :--- | :--- | | **inputs*** | _string_ | | | **parameters** | _object_ | | -| **        best_of** | _integer_ | | -| **        decoder_input_details** | _boolean_ | | -| **        details** | _boolean_ | | -| **        do_sample** | _boolean_ | | -| **        frequency_penalty** | _number_ | | +| **        adapter_id** | _string_ | Lora adapter id | +| **        best_of** | _integer_ | Generate best_of sequences and return the one if the highest token logprobs. | +| **        decoder_input_details** | _boolean_ | Whether to return decoder input token logprobs and ids. | +| **        details** | _boolean_ | Whether to return generation details. | +| **        do_sample** | _boolean_ | Activate logits sampling. | +| **        frequency_penalty** | _number_ | The parameter for frequency penalty. 1.0 means no penalty Penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. | | **        grammar** | _unknown_ | One of the following: | -| **                 (#1)** | | | +| **                 (#1)** | _object_ | | | **                        type*** | _enum_ | Possible values: json. | | **                        value*** | _unknown_ | A string that represents a [JSON Schema](https://json-schema.org/). JSON Schema is a declarative language that allows to annotate JSON documents with types and descriptions. | -| **                 (#2)** | | | +| **                 (#2)** | _object_ | | | **                        type*** | _enum_ | Possible values: regex. | | **                        value*** | _string_ | | -| **        max_new_tokens** | _integer_ | | -| **        repetition_penalty** | _number_ | | -| **        return_full_text** | _boolean_ | | -| **        seed** | _integer_ | | -| **        stop** | _string[]_ | | -| **        temperature** | _number_ | | -| **        top_k** | _integer_ | | -| **        top_n_tokens** | _integer_ | | -| **        top_p** | _number_ | | -| **        truncate** | _integer_ | | -| **        typical_p** | _number_ | | -| **        watermark** | _boolean_ | | +| **        max_new_tokens** | _integer_ | Maximum number of tokens to generate. | +| **        repetition_penalty** | _number_ | The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. | +| **        return_full_text** | _boolean_ | Whether to prepend the prompt to the generated text | +| **        seed** | _integer_ | Random sampling seed. | +| **        stop** | _string[]_ | Stop generating tokens if a member of `stop` is generated. | +| **        temperature** | _number_ | The value used to module the logits distribution. | +| **        top_k** | _integer_ | The number of highest probability vocabulary tokens to keep for top-k-filtering. | +| **        top_n_tokens** | _integer_ | The number of highest probability vocabulary tokens to keep for top-n-filtering. | +| **        top_p** | _number_ | Top-p value for nucleus sampling. | +| **        truncate** | _integer_ | Truncate inputs tokens to the given size. | +| **        typical_p** | _number_ | Typical Decoding mass See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information. | +| **        watermark** | _boolean_ | Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226). | | **stream** | _boolean_ | | @@ -200,6 +201,7 @@ For more information about streaming, check out [this guide](https://huggingface | **details** | _object_ | | | **        finish_reason** | _enum_ | Possible values: length, eos_token, stop_sequence. | | **        generated_tokens** | _integer_ | | +| **        input_length** | _integer_ | | | **        seed** | _integer_ | | | **generated_text** | _string_ | | | **index** | _integer_ | | diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts index 286594bbb..49657e1ed 100644 --- a/scripts/api-inference/scripts/generate.ts +++ b/scripts/api-inference/scripts/generate.ts @@ -232,7 +232,7 @@ function processPayloadSchema(schema: any): JsonObject[] { if (key.includes("(#")) { // If it's a combination, no need to re-specify the type except if it's to // specify a constant value. - type = value.const ? `'${value.const}'` : ""; + type = value.const ? `'${value.const}'` : type; } const row = { name: `${parentPrefix}${key}`, @@ -271,7 +271,7 @@ function processPayloadSchema(schema: any): JsonObject[] { processSchemaNode( `${NBSP}(#${index + 1})`, subSchema, - isRequired, + false, parentPrefix + TABLE_INDENT, ); });