[Inference snippets]: no need to showcase max_tokens (huggingface#1401)

Wauplin · web-flow · commit 75724b1b61f7 · 2025-04-30T10:07:47.000+02:00
_Originally by @julien-c / @gary149 on slack:_ > mais dans le snippet moi j'enlèverai compl!tement max_tokens, to be honest => let's remove `max_tokens` entirely from the inference snippets
diff --git a/packages/inference/src/snippets/getInferenceSnippets.ts b/packages/inference/src/snippets/getInferenceSnippets.ts
@@ -272,7 +272,7 @@ const prepareConversationalInput = (
 	return {
 		messages: opts?.messages ?? getModelInputSnippet(model),
 		...(opts?.temperature ? { temperature: opts?.temperature } : undefined),
-		max_tokens: opts?.max_tokens ?? 512,
+		...(opts?.max_tokens ? { max_tokens: opts?.max_tokens } : undefined),
 		...(opts?.top_p ? { top_p: opts?.top_p } : undefined),
 	};
 };
diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/bill-to-param/js/huggingface.js/0.hf-inference.js
@@ -11,7 +11,6 @@ const chatCompletion = await client.chatCompletion({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 }, {
     billTo: "huggingface",
 });
diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/js/openai/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/bill-to-param/js/openai/0.hf-inference.js
@@ -16,7 +16,6 @@ const chatCompletion = await client.chat.completions.create({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/bill-to-param/python/huggingface_hub/0.hf-inference.py
@@ -14,7 +14,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/python/openai/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/bill-to-param/python/openai/0.hf-inference.py
@@ -16,7 +16,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/python/requests/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/bill-to-param/python/requests/0.hf-inference.py
@@ -17,7 +17,6 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 512,
     "model": "meta-llama/Llama-3.1-8B-Instruct"
 })
 
diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/bill-to-param/sh/curl/0.hf-inference.sh
@@ -9,7 +9,6 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-I
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 512,
         "model": "meta-llama/Llama-3.1-8B-Instruct",
         "stream": false
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.hf-inference.js
@@ -11,7 +11,6 @@ const chatCompletion = await client.chatCompletion({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.together.js
@@ -11,7 +11,6 @@ const chatCompletion = await client.chatCompletion({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.hf-inference.js
@@ -13,7 +13,6 @@ const chatCompletion = await client.chat.completions.create({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.together.js
@@ -13,7 +13,6 @@ const chatCompletion = await client.chat.completions.create({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.hf-inference.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.together.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.hf-inference.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.together.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.hf-inference.py
@@ -16,7 +16,6 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 512,
     "model": "meta-llama/Llama-3.1-8B-Instruct"
 })
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.together.py
@@ -16,7 +16,6 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 512,
     "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>"
 })
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.hf-inference.sh
@@ -8,7 +8,6 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-I
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 512,
         "model": "meta-llama/Llama-3.1-8B-Instruct",
         "stream": false
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.together.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.together.sh
@@ -8,7 +8,6 @@ curl https://api.together.xyz/v1/chat/completions \
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 512,
         "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
         "stream": false
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.hf-inference.js
@@ -13,7 +13,6 @@ const stream = client.chatCompletionStream({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 for await (const chunk of stream) {
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.together.js
@@ -13,7 +13,6 @@ const stream = client.chatCompletionStream({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 for await (const chunk of stream) {
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.hf-inference.js
@@ -13,7 +13,6 @@ const stream = await client.chat.completions.create({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
     stream: true,
 });
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.together.js
@@ -13,7 +13,6 @@ const stream = await client.chat.completions.create({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
     stream: true,
 });
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.hf-inference.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
     stream=True,
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.together.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
     stream=True,
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.hf-inference.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
     stream=True,
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.together.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
     stream=True,
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.hf-inference.py
@@ -22,7 +22,6 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 512,
     "model": "meta-llama/Llama-3.1-8B-Instruct",
     "stream": True,
 })
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.together.py
@@ -22,7 +22,6 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 512,
     "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
     "stream": True,
 })
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.hf-inference.sh
@@ -8,7 +8,6 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-I
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 512,
         "model": "meta-llama/Llama-3.1-8B-Instruct",
         "stream": true
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.together.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.together.sh
@@ -8,7 +8,6 @@ curl https://api.together.xyz/v1/chat/completions \
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 512,
         "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
         "stream": true
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.fireworks-ai.js
@@ -22,7 +22,6 @@ const chatCompletion = await client.chatCompletion({
             ],
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.hf-inference.js
@@ -22,7 +22,6 @@ const chatCompletion = await client.chatCompletion({
             ],
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.fireworks-ai.js
@@ -24,7 +24,6 @@ const chatCompletion = await client.chat.completions.create({
             ],
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.hf-inference.js
@@ -24,7 +24,6 @@ const chatCompletion = await client.chat.completions.create({
             ],
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.fireworks-ai.py
@@ -24,7 +24,6 @@
             ]
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.hf-inference.py
@@ -24,7 +24,6 @@
             ]
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.fireworks-ai.py
@@ -24,7 +24,6 @@
             ]
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.hf-inference.py
@@ -24,7 +24,6 @@
             ]
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.fireworks-ai.py
@@ -27,7 +27,6 @@ def query(payload):
             ]
         }
     ],
-    "max_tokens": 512,
     "model": "<fireworks-ai alias for meta-llama/Llama-3.2-11B-Vision-Instruct>"
 })
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.hf-inference.py
@@ -27,7 +27,6 @@ def query(payload):
             ]
         }
     ],
-    "max_tokens": 512,
     "model": "meta-llama/Llama-3.2-11B-Vision-Instruct"
 })
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.fireworks-ai.sh b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.fireworks-ai.sh
@@ -19,7 +19,6 @@ curl https://api.fireworks.ai/inference/v1/chat/completions \
                 ]
             }
         ],
-        "max_tokens": 512,
         "model": "<fireworks-ai alias for meta-llama/Llama-3.2-11B-Vision-Instruct>",
         "stream": false
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.hf-inference.sh
@@ -19,7 +19,6 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.2-11B-
                 ]
             }
         ],
-        "max_tokens": 512,
         "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
         "stream": false
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.fireworks-ai.js
@@ -24,7 +24,6 @@ const stream = client.chatCompletionStream({
             ],
         },
     ],
-    max_tokens: 512,
 });
 
 for await (const chunk of stream) {
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.hf-inference.js
@@ -24,7 +24,6 @@ const stream = client.chatCompletionStream({
             ],
         },
     ],
-    max_tokens: 512,
 });
 
 for await (const chunk of stream) {
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.fireworks-ai.js
@@ -24,7 +24,6 @@ const stream = await client.chat.completions.create({
             ],
         },
     ],
-    max_tokens: 512,
     stream: true,
 });
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.hf-inference.js
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.fireworks-ai.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.hf-inference.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/openai/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/openai/0.fireworks-ai.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/openai/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/openai/0.hf-inference.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.fireworks-ai.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.hf-inference.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/sh/curl/0.fireworks-ai.sh b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/sh/curl/0.fireworks-ai.sh
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/sh/curl/0.hf-inference.sh

Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,6 @@`
`14`	`14`	`"content": "What is the capital of France?"`
`15`	`15`	`}`
`16`	`16`	`],`
`17`		`- max_tokens=512,`
`18`	`17`	`)`
`19`	`18`
`20`	`19`	`print(completion.choices[0].message)`
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,6 @@`
`16`	`16`	`"content": "What is the capital of France?"`
`17`	`17`	`}`
`18`	`18`	`],`
`19`		`- max_tokens=512,`
`20`	`19`	`)`
`21`	`20`
`22`	`21`	`print(completion.choices[0].message)`
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,6 @@ def query(payload):`
`17`	`17`	`"content": "What is the capital of France?"`
`18`	`18`	`}`
`19`	`19`	`],`
`20`		`- "max_tokens": 512,`
`21`	`20`	`"model": "meta-llama/Llama-3.1-8B-Instruct"`
`22`	`21`	`})`
`23`	`22`
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,6 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-I`
`9`	`9`	`"content": "What is the capital of France?"`
`10`	`10`	`}`
`11`	`11`	`],`
`12`		`- "max_tokens": 512,`
`13`	`12`	`"model": "meta-llama/Llama-3.1-8B-Instruct",`
`14`	`13`	`"stream": false`
`15`	`14`	`}'`