@@ -24,13 +24,12 @@ This is a subtask of [`text-generation`](https://huggingface.co/docs/api-inferen
2424- [ google/gemma-2-2b-it] ( https://huggingface.co/google/gemma-2-2b-it ) : A text-generation model trained to follow instructions.
2525- [ meta-llama/Meta-Llama-3.1-8B-Instruct] ( https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct ) : Very powerful text generation model trained to follow instructions.
2626- [ microsoft/Phi-3-mini-4k-instruct] ( https://huggingface.co/microsoft/Phi-3-mini-4k-instruct ) : Small yet powerful text generation model.
27- - [ HuggingFaceH4/starchat2-15b-v0.1] ( https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1 ) : Strong coding assistant model.
28- - [ mistralai/Mistral-Nemo-Instruct-2407] ( https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407 ) : Very strong open-source large language model.
27+ - [ Qwen/Qwen2.5-7B-Instruct] ( https://huggingface.co/Qwen/Qwen2.5-7B-Instruct ) : Strong text generation model to follow instructions.
2928
3029#### Conversational Vision-Language Models (VLMs)
3130
3231- [ meta-llama/Llama-3.2-11B-Vision-Instruct] ( https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct ) : Powerful vision language model with great visual understanding and reasoning capabilities.
33- - [ microsoft/Phi-3.5-vision-instruct ] ( https://huggingface.co/microsoft/Phi-3.5-vision-instruct ) : Strong image-text-to-text model.
32+ - [ Qwen/Qwen2-VL-7B-Instruct ] ( https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct ) : Strong image-text-to-text model.
3433
3534### API Playground
3635
@@ -65,46 +64,133 @@ The API supports:
6564curl ' https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1/chat/completions' \
6665-H " Authorization: Bearer hf_***" \
6766-H ' Content-Type: application/json' \
68- -d ' {
69- "model": "google/gemma-2-2b-it",
70- "messages": [{"role": "user", "content": "What is the capital of France?"}],
71- "max_tokens": 500,
72- "stream": false
67+ --data ' {
68+ "model": "google/gemma-2-2b-it",
69+ "messages": [
70+ {
71+ "role": "user",
72+ "content": "What is the capital of France?"
73+ }
74+ ],
75+ "max_tokens": 500,
76+ "stream": true
7377}'
74-
7578```
7679</curl >
7780
7881<python >
82+ With huggingface_hub client:
7983``` py
8084from huggingface_hub import InferenceClient
8185
8286client = InferenceClient(api_key = " hf_***" )
8387
84- for message in client.chat_completion(
85- model = " google/gemma-2-2b-it" ,
86- messages = [{" role" : " user" , " content" : " What is the capital of France?" }],
88+ messages = [
89+ {
90+ " role" : " user" ,
91+ " content" : " What is the capital of France?"
92+ }
93+ ]
94+
95+ stream = client.chat.completions.create(
96+ model = " google/gemma-2-2b-it" ,
97+ messages = messages,
8798 max_tokens = 500 ,
88- stream = True ,
89- ):
90- print (message.choices[0 ].delta.content, end = " " )
99+ stream = True
100+ )
101+
102+ for chunk in stream:
103+ print (chunk.choices[0 ].delta.content, end = " " )
104+ ```
105+
106+ With openai client:
107+ ``` py
108+ from openai import OpenAI
109+
110+ client = OpenAI(
111+ base_url = " https://api-inference.huggingface.co/v1/" ,
112+ api_key = " hf_***"
113+ )
114+
115+ messages = [
116+ {
117+ " role" : " user" ,
118+ " content" : " What is the capital of France?"
119+ }
120+ ]
121+
122+ stream = client.chat.completions.create(
123+ model = " google/gemma-2-2b-it" ,
124+ messages = messages,
125+ max_tokens = 500 ,
126+ stream = True
127+ )
128+
129+ for chunk in stream:
130+ print (chunk.choices[0 ].delta.content, end = " " )
91131```
92132
93133To use the Python client, see ` huggingface_hub ` 's [ package reference] ( https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion ) .
94134</python >
95135
96136<js >
137+ With huggingface_hub client:
97138``` js
98- import { HfInference } from " @huggingface/inference" ;
139+ import { HfInference } from " @huggingface/inference"
99140
100- const inference = new HfInference (" hf_***" );
141+ const client = new HfInference (" hf_***" )
101142
102- for await (const chunk of inference .chatCompletionStream ({
143+ let out = " " ;
144+
145+ const stream = client .chatCompletionStream ({
103146 model: " google/gemma-2-2b-it" ,
104- messages: [{ role: " user" , content: " What is the capital of France?" }],
147+ messages: [
148+ {
149+ role: " user" ,
150+ content: " What is the capital of France?"
151+ }
152+ ],
153+ max_tokens: 500
154+ });
155+
156+ for await (const chunk of stream ) {
157+ if (chunk .choices && chunk .choices .length > 0 ) {
158+ const newContent = chunk .choices [0 ].delta .content ;
159+ out += newContent;
160+ console .log (newContent);
161+ }
162+ }
163+ ```
164+
165+ With openai client:
166+ ``` js
167+ import { OpenAI } from " openai"
168+
169+ const client = new OpenAI ({
170+ baseURL: " https://api-inference.huggingface.co/v1/" ,
171+ apiKey: " hf_***"
172+ })
173+
174+ let out = " " ;
175+
176+ const stream = await client .chat .completions .create ({
177+ model: " google/gemma-2-2b-it" ,
178+ messages: [
179+ {
180+ role: " user" ,
181+ content: " What is the capital of France?"
182+ }
183+ ],
105184 max_tokens: 500 ,
106- })) {
107- process .stdout .write (chunk .choices [0 ]? .delta ? .content || " " );
185+ stream: true ,
186+ });
187+
188+ for await (const chunk of stream ) {
189+ if (chunk .choices && chunk .choices .length > 0 ) {
190+ const newContent = chunk .choices [0 ].delta .content ;
191+ out += newContent;
192+ console .log (newContent);
193+ }
108194}
109195```
110196
@@ -125,73 +211,188 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
125211curl ' https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions' \
126212-H " Authorization: Bearer hf_***" \
127213-H ' Content-Type: application/json' \
128- - d ' {
129- "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
130- "messages": [
214+ --data ' {
215+ "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
216+ "messages": [
131217 {
132218 "role": "user",
133219 "content": [
134- {"type": "image_url", "image_url": {"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"}},
135- {"type": "text", "text": "Describe this image in one sentence."}
220+ {
221+ "type": "text",
222+ "text": "Describe this image in one sentence."
223+ },
224+ {
225+ "type": "image_url",
226+ "image_url": {
227+ "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
228+ }
229+ }
136230 ]
137231 }
138232 ],
139- "max_tokens": 500,
140- "stream": false
233+ "max_tokens": 500,
234+ "stream": true
141235}'
142-
143236```
144237</curl >
145238
146239<python >
240+ With huggingface_hub client:
147241``` py
148242from huggingface_hub import InferenceClient
149243
150244client = InferenceClient(api_key = " hf_***" )
151245
152- image_url = " https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
246+ messages = [
247+ {
248+ " role" : " user" ,
249+ " content" : [
250+ {
251+ " type" : " text" ,
252+ " text" : " Describe this image in one sentence."
253+ },
254+ {
255+ " type" : " image_url" ,
256+ " image_url" : {
257+ " url" : " https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
258+ }
259+ }
260+ ]
261+ }
262+ ]
263+
264+ stream = client.chat.completions.create(
265+ model = " meta-llama/Llama-3.2-11B-Vision-Instruct" ,
266+ messages = messages,
267+ max_tokens = 500 ,
268+ stream = True
269+ )
153270
154- for message in client .chat_completion (
155- model= " meta-llama/Llama-3.2-11B-Vision-Instruct" ,
156- messages= [
157- {
158- " role" : " user" ,
159- " content" : [
160- {" type" : " image_url" , " image_url" : {" url" : image_url}},
161- {" type" : " text" , " text" : " Describe this image in one sentence." },
162- ],
163- }
164- ],
271+ for chunk in stream:
272+ print (chunk.choices[0 ].delta.content, end = " " )
273+ ```
274+
275+ With openai client:
276+ ``` py
277+ from openai import OpenAI
278+
279+ client = OpenAI(
280+ base_url = " https://api-inference.huggingface.co/v1/" ,
281+ api_key = " hf_***"
282+ )
283+
284+ messages = [
285+ {
286+ " role" : " user" ,
287+ " content" : [
288+ {
289+ " type" : " text" ,
290+ " text" : " Describe this image in one sentence."
291+ },
292+ {
293+ " type" : " image_url" ,
294+ " image_url" : {
295+ " url" : " https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
296+ }
297+ }
298+ ]
299+ }
300+ ]
301+
302+ stream = client.chat.completions.create(
303+ model = " meta-llama/Llama-3.2-11B-Vision-Instruct" ,
304+ messages = messages,
165305 max_tokens = 500 ,
166- stream= True,
167- ):
168- print (message .choices [0 ].delta .content , end= " " )
306+ stream = True
307+ )
308+
309+ for chunk in stream:
310+ print (chunk.choices[0 ].delta.content, end = " " )
169311```
170312
171313To use the Python client, see ` huggingface_hub ` 's [ package reference] ( https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion ) .
172314</python >
173315
174316<js >
317+ With huggingface_hub client:
175318``` js
176- import { HfInference } from " @huggingface/inference" ;
319+ import { HfInference } from " @huggingface/inference"
177320
178- const inference = new HfInference (" hf_***" );
179- const imageUrl = " https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" ;
321+ const client = new HfInference (" hf_***" )
180322
181- for await (const chunk of inference .chatCompletionStream ({
323+ let out = " " ;
324+
325+ const stream = client .chatCompletionStream ({
182326 model: " meta-llama/Llama-3.2-11B-Vision-Instruct" ,
183327 messages: [
184328 {
185- " role" : " user" ,
186- " content" : [
187- {" type" : " image_url" , " image_url" : {" url" : imageUrl}},
188- {" type" : " text" , " text" : " Describe this image in one sentence." },
189- ],
329+ role: " user" ,
330+ content: [
331+ {
332+ type: " text" ,
333+ text: " Describe this image in one sentence."
334+ },
335+ {
336+ type: " image_url" ,
337+ image_url: {
338+ url: " https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
339+ }
340+ }
341+ ]
342+ }
343+ ],
344+ max_tokens: 500
345+ });
346+
347+ for await (const chunk of stream ) {
348+ if (chunk .choices && chunk .choices .length > 0 ) {
349+ const newContent = chunk .choices [0 ].delta .content ;
350+ out += newContent;
351+ console .log (newContent);
352+ }
353+ }
354+ ```
355+
356+ With openai client:
357+ ``` js
358+ import { OpenAI } from " openai"
359+
360+ const client = new OpenAI ({
361+ baseURL: " https://api-inference.huggingface.co/v1/" ,
362+ apiKey: " hf_***"
363+ })
364+
365+ let out = " " ;
366+
367+ const stream = await client .chat .completions .create ({
368+ model: " meta-llama/Llama-3.2-11B-Vision-Instruct" ,
369+ messages: [
370+ {
371+ role: " user" ,
372+ content: [
373+ {
374+ type: " text" ,
375+ text: " Describe this image in one sentence."
376+ },
377+ {
378+ type: " image_url" ,
379+ image_url: {
380+ url: " https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
381+ }
382+ }
383+ ]
190384 }
191385 ],
192386 max_tokens: 500 ,
193- })) {
194- process .stdout .write (chunk .choices [0 ]? .delta ? .content || " " );
387+ stream: true ,
388+ });
389+
390+ for await (const chunk of stream ) {
391+ if (chunk .choices && chunk .choices .length > 0 ) {
392+ const newContent = chunk .choices [0 ].delta .content ;
393+ out += newContent;
394+ console .log (newContent);
395+ }
195396}
196397```
197398
0 commit comments