fix: Add SamplingParams to model response examples

Ki-Seki · RobinPicard · commit e12c99518335 · 2025-11-24T17:28:53.000+08:00
Updated usage examples to include SamplingParams for response generation and streaming.
diff --git a/docs/features/models/vllm_offline.md b/docs/features/models/vllm_offline.md
@@ -64,7 +64,7 @@ For instance:
 
 ```python
 import outlines
-from vllm import LLM
+from vllm import LLM, SamplingParams
 from outlines.inputs import Chat
 
 # Create the model
@@ -79,7 +79,7 @@ prompt = Chat([
 ])
 
 # Call the model to generate a response
-response = model(prompt, max_tokens=50)
+response = model(prompt, sampling_params=SamplingParams(max_tokens=50))
 print(response) # 'Riga'
 ```
 
@@ -91,15 +91,15 @@ For instance:
 
 ```python
 import outlines
-from vllm import LLM
+from vllm import LLM, SamplingParams
 
 # Create the model
 model = outlines.from_vllm_offline(
     LLM("microsoft/Phi-3-mini-4k-instruct")
 )
 
 # Stream the response
-for chunk in model.stream("Tell me a short story about a cat.", max_tokens=50):
+for chunk in model.stream("Tell me a short story about a cat.", sampling_params=SamplingParams(max_tokens=50)):
     print(chunk) # 'Once...'
 ```