Skip to content

Commit b6771b3

Browse files
committed
fix chat-completion and image-text-to-text docs
1 parent 7cf2959 commit b6771b3

File tree

6 files changed

+213
-138
lines changed

6 files changed

+213
-138
lines changed

docs/api-inference/tasks/chat-completion.md

Lines changed: 96 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,20 @@ For more details, check out:
1414

1515
## Chat Completion
1616

17-
Generate a response given a list of messages.
18-
This is a subtask of [`text-generation`](./text_generation) designed to generate responses in a conversational context.
19-
20-
17+
Generate a response given a list of messages in a conversational context, supporting both conversational Language Models (LLMs) and conversational Vision-Language Models (VLMs).
18+
This is a subtask of [`text-generation`](./text_generation) and [`image-text-to-text`](./image_text_to_text).
2119

2220
### Recommended models
2321

22+
#### Conversational Large Language Models (LLMs)
2423
- [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
2524
- [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
2625
- [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct): Small yet powerful text generation model.
2726
- [HuggingFaceH4/starchat2-15b-v0.1](https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1): Strong coding assistant model.
2827
- [mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407): Very strong open-source large language model.
2928

30-
29+
#### Conversational Vision-Language Models (VLMs)
30+
- [microsoft/Phi-3.5-vision-instruct](https://huggingface.co/microsoft/Phi-3.5-vision-instruct): Strong image-text-to-text model.
3131

3232
### Using the API
3333

@@ -37,6 +37,8 @@ The API supports:
3737
* Using grammars, constraints, and tools.
3838
* Streaming the output
3939

40+
#### Code snippet example for conversational LLMs
41+
4042

4143
<inferencesnippet>
4244

@@ -70,7 +72,7 @@ for message in client.chat_completion(
7072
print(message.choices[0].delta.content, end="")
7173
```
7274

73-
To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
75+
To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.conversational_text-generation).
7476
</python>
7577

7678
<js>
@@ -88,7 +90,94 @@ for await (const chunk of inference.chatCompletionStream({
8890
}
8991
```
9092
91-
To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#chatcompletion).
93+
To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#conversationaltext-generation).
94+
</js>
95+
96+
</inferencesnippet>
97+
98+
99+
100+
#### Code snippet example for conversational VLMs
101+
102+
103+
<inferencesnippet>
104+
105+
<curl>
106+
```bash
107+
curl 'https://api-inference.huggingface.co/models/microsoft/Phi-3.5-vision-instruct/v1/chat/completions' \
108+
-H "Authorization: Bearer hf_***" \
109+
-H 'Content-Type: application/json' \
110+
-d '{
111+
"model": "microsoft/Phi-3.5-vision-instruct",
112+
"messages": [
113+
{
114+
"role": "user",
115+
"content": [
116+
{"type": "image_url", "image_url": {"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"}},
117+
{"type": "text", "text": "Describe this image in one sentence."}
118+
]
119+
}
120+
],
121+
"max_tokens": 500,
122+
"stream": false
123+
}'
124+
125+
```
126+
</curl>
127+
128+
<python>
129+
```py
130+
from huggingface_hub import InferenceClient
131+
132+
client = InferenceClient(api_key="hf_***")
133+
134+
image_url = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
135+
136+
for message in client.chat_completion(
137+
model="microsoft/Phi-3.5-vision-instruct",
138+
messages=[
139+
{
140+
"role": "user",
141+
"content": [
142+
{"type": "image_url", "image_url": {"url": image_url}},
143+
{"type": "text", "text": "Describe this image in one sentence."},
144+
],
145+
}
146+
],
147+
max_tokens=500,
148+
stream=True,
149+
):
150+
print(message.choices[0].delta.content, end="")
151+
```
152+
153+
To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.conversational_image-text-to-text).
154+
</python>
155+
156+
<js>
157+
```js
158+
import { HfInference } from "@huggingface/inference";
159+
160+
const inference = new HfInference("hf_***");
161+
const imageUrl = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg";
162+
163+
for await (const chunk of inference.chatCompletionStream({
164+
model: "microsoft/Phi-3.5-vision-instruct",
165+
messages: [
166+
{
167+
"role": "user",
168+
"content": [
169+
{"type": "image_url", "image_url": {"url": imageUrl}},
170+
{"type": "text", "text": "Describe this image in one sentence."},
171+
],
172+
}
173+
],
174+
max_tokens: 500,
175+
})) {
176+
process.stdout.write(chunk.choices[0]?.delta?.content || "");
177+
}
178+
```
179+
180+
To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#conversationalimage-text-to-text).
92181
</js>
93182
94183
</inferencesnippet>

docs/api-inference/tasks/image-text-to-text.md

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -109,21 +109,6 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
109109

110110
### API specification
111111

112-
#### Request
113-
114-
115-
116-
Some options can be configured by passing headers to the Inference API. Here are the available headers:
117-
118-
| Headers | | |
119-
| :--- | :--- | :--- |
120-
| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
121-
| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
122-
| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
123-
124-
For more information about Inference API headers, check out the parameters [guide](../parameters).
125-
126-
#### Response
127-
112+
For the API specification of conversational image-text-to-text models, please refer to the [Chat Completion API documentation](https://huggingface.co/docs/api-inference/tasks/chat-completion#api-specification).
128113

129114

docs/api-inference/tasks/text-generation.md

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -42,50 +42,54 @@ This is only a subset of the supported models. Find the model that suits you bes
4242

4343
<curl>
4444
```bash
45-
curl 'https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1/chat/completions' \
46-
-H "Authorization: Bearer hf_***" \
47-
-H 'Content-Type: application/json' \
48-
-d '{
49-
"model": "google/gemma-2-2b-it",
50-
"messages": [{"role": "user", "content": "What is the capital of France?"}],
51-
"max_tokens": 500,
52-
"stream": false
53-
}'
54-
45+
curl https://api-inference.huggingface.co/models/google/gemma-2-2b-it \
46+
-X POST \
47+
-d '{"inputs": "Can you please let us know more details about your "}' \
48+
-H 'Content-Type: application/json' \
49+
-H "Authorization: Bearer hf_***"
5550
```
5651
</curl>
5752

5853
<python>
5954
```py
60-
from huggingface_hub import InferenceClient
61-
62-
client = InferenceClient(api_key="hf_***")
63-
64-
for message in client.chat_completion(
65-
model="google/gemma-2-2b-it",
66-
messages=[{"role": "user", "content": "What is the capital of France?"}],
67-
max_tokens=500,
68-
stream=True,
69-
):
70-
print(message.choices[0].delta.content, end="")
55+
import requests
56+
57+
API_URL = "https://api-inference.huggingface.co/models/google/gemma-2-2b-it"
58+
headers = {"Authorization": "Bearer hf_***"}
59+
60+
def query(payload):
61+
response = requests.post(API_URL, headers=headers, json=payload)
62+
return response.json()
63+
64+
output = query({
65+
"inputs": "Can you please let us know more details about your ",
66+
})
7167
```
7268

7369
To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation).
7470
</python>
7571

7672
<js>
7773
```js
78-
import { HfInference } from "@huggingface/inference";
79-
80-
const inference = new HfInference("hf_***");
81-
82-
for await (const chunk of inference.chatCompletionStream({
83-
model: "google/gemma-2-2b-it",
84-
messages: [{ role: "user", content: "What is the capital of France?" }],
85-
max_tokens: 500,
86-
})) {
87-
process.stdout.write(chunk.choices[0]?.delta?.content || "");
74+
async function query(data) {
75+
const response = await fetch(
76+
"https://api-inference.huggingface.co/models/google/gemma-2-2b-it",
77+
{
78+
headers: {
79+
Authorization: "Bearer hf_***"
80+
"Content-Type": "application/json",
81+
},
82+
method: "POST",
83+
body: JSON.stringify(data),
84+
}
85+
);
86+
const result = await response.json();
87+
return result;
8888
}
89+
90+
query({"inputs": "Can you please let us know more details about your "}).then((response) => {
91+
console.log(JSON.stringify(response));
92+
});
8993
```
9094

9195
To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#textgeneration).

0 commit comments

Comments
 (0)