@@ -691,7 +691,11 @@ def llm_factory(
691691
692692
693693class InstructorModelArgs (BaseModel ):
694- """Simple model arguments configuration for instructor LLMs"""
694+ """Simple model arguments configuration for instructor LLMs
695+
696+ Note: For GPT-5 and o-series models, you may need to increase max_tokens
697+ to 4096+ for structured output to work properly. See documentation for details.
698+ """
695699
696700 temperature : float = 0.01
697701 top_p : float = 0.1
@@ -764,13 +768,20 @@ def _map_provider_params(self) -> t.Dict[str, t.Any]:
764768 return self .model_args .copy ()
765769
766770 def _map_openai_params (self ) -> t .Dict [str , t .Any ]:
767- """Map max_tokens to max_completion_tokens for OpenAI reasoning models.
771+ """Map parameters for OpenAI reasoning models with special constraints .
768772
769- Reasoning models (o-series and gpt-5 series) require max_completion_tokens
770- instead of the deprecated max_tokens parameter when using Chat Completions API.
773+ Reasoning models (o-series and gpt-5 series) have unique requirements:
774+ 1. max_tokens must be mapped to max_completion_tokens
775+ 2. temperature must be set to 1.0 (only supported value)
776+ 3. top_p parameter must be removed (not supported)
771777
772778 Legacy OpenAI models (gpt-4, gpt-4o, etc.) continue to use max_tokens unchanged.
773779
780+ For GPT-5 and o-series models with structured output (Pydantic models):
781+ - Default max_tokens=1024 may not be sufficient
782+ - Consider increasing to 4096+ via: llm_factory(..., max_tokens=4096)
783+ - If structured output is truncated, increase max_tokens further
784+
774785 Pattern-based matching for future-proof coverage:
775786 - O-series: o1, o2, o3, o4, o5, ... (all reasoning versions)
776787 - GPT-5 series: gpt-5, gpt-5-*, gpt-6, gpt-7, ... (all GPT-5+ models)
@@ -822,6 +833,14 @@ def is_reasoning_model(model_str: str) -> bool:
822833 if requires_max_completion_tokens and "max_tokens" in mapped_args :
823834 mapped_args ["max_completion_tokens" ] = mapped_args .pop ("max_tokens" )
824835
836+ # Handle parameter constraints for reasoning models (GPT-5 and o-series)
837+ if requires_max_completion_tokens :
838+ # GPT-5 and o-series models have strict parameter requirements:
839+ # 1. Temperature must be exactly 1.0 (only supported value)
840+ # 2. top_p parameter is not supported and must be removed
841+ mapped_args ["temperature" ] = 1.0
842+ mapped_args .pop ("top_p" , None )
843+
825844 return mapped_args
826845
827846 def _map_google_params (self ) -> t .Dict [str , t .Any ]:
0 commit comments