@@ -541,7 +541,11 @@ def llm_factory(
541541
542542
543543class InstructorModelArgs (BaseModel ):
544- """Simple model arguments configuration for instructor LLMs"""
544+ """Simple model arguments configuration for instructor LLMs
545+
546+ Note: For GPT-5 and o-series models, you may need to increase max_tokens
547+ to 4096+ for structured output to work properly. See documentation for details.
548+ """
545549
546550 temperature : float = 0.01
547551 top_p : float = 0.1
@@ -614,13 +618,20 @@ def _map_provider_params(self) -> t.Dict[str, t.Any]:
614618 return self .model_args .copy ()
615619
616620 def _map_openai_params (self ) -> t .Dict [str , t .Any ]:
617- """Map max_tokens to max_completion_tokens for OpenAI reasoning models.
621+ """Map parameters for OpenAI reasoning models with special constraints .
618622
619- Reasoning models (o-series and gpt-5 series) require max_completion_tokens
620- instead of the deprecated max_tokens parameter when using Chat Completions API.
623+ Reasoning models (o-series and gpt-5 series) have unique requirements:
624+ 1. max_tokens must be mapped to max_completion_tokens
625+ 2. temperature must be set to 1.0 (only supported value)
626+ 3. top_p parameter must be removed (not supported)
621627
622628 Legacy OpenAI models (gpt-4, gpt-4o, etc.) continue to use max_tokens unchanged.
623629
630+ For GPT-5 and o-series models with structured output (Pydantic models):
631+ - Default max_tokens=1024 may not be sufficient
632+ - Consider increasing to 4096+ via: llm_factory(..., max_tokens=4096)
633+ - If structured output is truncated, increase max_tokens further
634+
624635 Pattern-based matching for future-proof coverage:
625636 - O-series: o1, o2, o3, o4, o5, ... (all reasoning versions)
626637 - GPT-5 series: gpt-5, gpt-5-*, gpt-6, gpt-7, ... (all GPT-5+ models)
@@ -672,6 +683,14 @@ def is_reasoning_model(model_str: str) -> bool:
672683 if requires_max_completion_tokens and "max_tokens" in mapped_args :
673684 mapped_args ["max_completion_tokens" ] = mapped_args .pop ("max_tokens" )
674685
686+ # Handle parameter constraints for reasoning models (GPT-5 and o-series)
687+ if requires_max_completion_tokens :
688+ # GPT-5 and o-series models have strict parameter requirements:
689+ # 1. Temperature must be exactly 1.0 (only supported value)
690+ # 2. top_p parameter is not supported and must be removed
691+ mapped_args ["temperature" ] = 1.0
692+ mapped_args .pop ("top_p" , None )
693+
675694 return mapped_args
676695
677696 def _map_google_params (self ) -> t .Dict [str , t .Any ]:
0 commit comments