diff --git a/statgpt/common/config/__init__.py b/statgpt/common/config/__init__.py index d39b9f99..06a5c5a2 100644 --- a/statgpt/common/config/__init__.py +++ b/statgpt/common/config/__init__.py @@ -1,4 +1,4 @@ -from .llm_models import EmbeddingModelsEnum, LLMModelsEnum +from .llm_models import EmbeddingModelsEnum, LLMModelsEnum, ReasoningEffortEnum, VerbosityEnum from .logging import LoggingConfig, logger, multiline_logger from .versions import Versions diff --git a/statgpt/common/config/llm_models.py b/statgpt/common/config/llm_models.py index 56769f82..76638aa3 100644 --- a/statgpt/common/config/llm_models.py +++ b/statgpt/common/config/llm_models.py @@ -6,6 +6,26 @@ class EmbeddingModelsEnum(StrEnum): TEXT_EMBEDDING_3_LARGE = "text-embedding-3-large" +class ReasoningEffortEnum(StrEnum): + """Reasoning effort levels for GPT-5 models.""" + + NONE = "none" + """No reasoning mode - standard inference.""" + MINIMAL = "minimal" + LOW = "low" + MEDIUM = "medium" + HIGH = "high" + XHIGH = "xhigh" + + +class VerbosityEnum(StrEnum): + """Output verbosity levels for GPT-5 models.""" + + LOW = "low" + MEDIUM = "medium" + HIGH = "high" + + class LLMModelsEnum(StrEnum): # Gemini models GEMINI_2_0_FLASH_LITE_001 = "gemini-2.0-flash-lite-001" @@ -30,6 +50,11 @@ class LLMModelsEnum(StrEnum): GPT_4_1_MINI_2025_04_14 = "gpt-4.1-mini-2025-04-14" GPT_4_1_NANO_2025_04_14 = "gpt-4.1-nano-2025-04-14" + # GPT-5 models + GPT_5_MINI_2025_08_07 = "gpt-5-mini-2025-08-07" + GPT_5_1_2025_11_13 = "gpt-5.1-2025-11-13" + GPT_5_2_2025_12_11 = "gpt-5.2-2025-12-11" + @property def deployment_id(self) -> str: return os.getenv(f"LLM_MODELS_{self.name}", self.value) @@ -42,3 +67,12 @@ def is_gpt_41_family(self) -> bool: LLMModelsEnum.GPT_4_1_MINI_2025_04_14, LLMModelsEnum.GPT_4_1_NANO_2025_04_14, } + + @property + def is_gpt_5_family(self) -> bool: + """Check if the model belongs to the GPT-5 family.""" + return self in { + LLMModelsEnum.GPT_5_MINI_2025_08_07, + LLMModelsEnum.GPT_5_1_2025_11_13, + LLMModelsEnum.GPT_5_2_2025_12_11, + } diff --git a/statgpt/common/models/models.py b/statgpt/common/models/models.py index 8fb43bf1..8f89ac00 100644 --- a/statgpt/common/models/models.py +++ b/statgpt/common/models/models.py @@ -32,7 +32,7 @@ class Channel(DefaultBase): title: Mapped[str] description: Mapped[str] deployment_id: Mapped[str] = mapped_column(unique=True) - llm_model: Mapped[str] = mapped_column(default=langchain_settings.default_model.value) + llm_model: Mapped[str] = mapped_column(default=langchain_settings.embedding_default_model.value) details: Mapped[dict[str, Any]] = mapped_column(type_=postgresql.JSONB) # ~~~~~ Relationships ~~~~~ @@ -150,7 +150,9 @@ class ChannelDatasetVersion(DefaultBase): __tablename__ = "channel_dataset_versions" __table_args__ = ( UniqueConstraint( - 'channel_dataset_id', 'version', name='uix_unique_version_for_channel_dataset' + "channel_dataset_id", + "version", + name="uix_unique_version_for_channel_dataset", ), ) @@ -158,7 +160,7 @@ class ChannelDatasetVersion(DefaultBase): version: Mapped[int] = mapped_column(default=0) # will be auto-incremented by trigger preprocessing_status: Mapped[PreprocessingStatusEnum] pointer_to: Mapped[int | None] = mapped_column( - ForeignKey("channel_dataset_versions.id", ondelete='SET NULL'), default=None + ForeignKey("channel_dataset_versions.id", ondelete="SET NULL"), default=None ) creation_reason: Mapped[str] @@ -181,14 +183,14 @@ class ChannelDatasetVersion(DefaultBase): channel_dataset: Mapped[ChannelDataset] = relationship(back_populates="versions") pointer = relationship( "ChannelDatasetVersion", - remote_side='ChannelDatasetVersion.id', + remote_side="ChannelDatasetVersion.id", back_populates="pointing_versions", cascade="all", passive_deletes=True, ) pointing_versions = relationship( "ChannelDatasetVersion", - remote_side='ChannelDatasetVersion.pointer_to', + remote_side="ChannelDatasetVersion.pointer_to", back_populates="pointer", cascade="all, delete-orphan", passive_deletes=True, diff --git a/statgpt/common/schemas/model_config.py b/statgpt/common/schemas/model_config.py index 20c14078..a6505a87 100644 --- a/statgpt/common/schemas/model_config.py +++ b/statgpt/common/schemas/model_config.py @@ -1,6 +1,11 @@ -from pydantic import Field +from pydantic import Field, model_validator -from statgpt.common.config import EmbeddingModelsEnum, LLMModelsEnum +from statgpt.common.config import ( + EmbeddingModelsEnum, + LLMModelsEnum, + ReasoningEffortEnum, + VerbosityEnum, +) from statgpt.common.settings.langchain import langchain_settings from .base import BaseYamlModel @@ -10,7 +15,8 @@ class BaseModelConfig(BaseYamlModel): """Base config for LLM and embeddings models configs.""" api_version: str = Field( - default=langchain_settings.default_api_version, description="API version for the model" + default=langchain_settings.default_api_version, + description="API version for the model", ) @@ -30,16 +36,47 @@ class LLMModelConfig(BaseModelConfig): default=langchain_settings.default_model, description="The deployment of the model in DIAL", ) - temperature: float = Field( + temperature: float | None = Field( default=langchain_settings.default_temperature, description=( "The temperature of the model. 0.0 means deterministic output, higher values mean more" - " randomness." + " randomness. Note: For reasoning models (except reasoning_effort=none) should be set to 1" ), ) seed: int | None = Field( default=langchain_settings.default_seed, description=( - "The seed of the model. If set, the model will produce the same output for the same input." + "The seed of the model. If set, the model will produce the same output for the same input. " + "Note: Not supported by GPT-5 models." ), ) + reasoning_effort: ReasoningEffortEnum | None = Field( + default=None, + description=( + "Reasoning effort level for GPT-5 models. " + "Supports: none, minimal, low, medium, high, xhigh. " + "All models before gpt-5.1 default to medium reasoning effort, and do not support none." + ), + ) + verbosity: VerbosityEnum | None = Field( + default=None, + description="Output verbosity for GPT-5 models (low/medium/high).", + ) + + @model_validator(mode="after") + def _validate_model_family_params(self) -> "LLMModelConfig": + if self.deployment.is_gpt_5_family: + if self.seed is not None: + raise ValueError("seed is not supported for GPT-5 models") + if self.reasoning_effort is None: + raise ValueError("reasoning_effort is required for GPT-5 models") + if self.reasoning_effort is not ReasoningEffortEnum.NONE and self.temperature != 1: + raise ValueError( + "temperature must be set to 1 when reasoning_effort is enabled for GPT-5 models" + ) + else: + if self.reasoning_effort is not None: + raise ValueError("reasoning_effort is only supported for GPT-5 models") + if self.verbosity is not None: + raise ValueError("verbosity is only supported for GPT-5 models") + return self diff --git a/statgpt/common/settings/langchain.py b/statgpt/common/settings/langchain.py index 52f2f8fe..13e08ef4 100644 --- a/statgpt/common/settings/langchain.py +++ b/statgpt/common/settings/langchain.py @@ -1,6 +1,4 @@ -from typing import Optional - -from langchain import globals as lc_globals +from langchain_core import globals as lc_globals from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict @@ -35,7 +33,7 @@ class LangChainSettings(BaseSettings): description="Default API version for Azure OpenAI", ) - default_seed: Optional[int] = Field( + default_seed: int | None = Field( default=None, description="Default seed for reproducible outputs", ) diff --git a/statgpt/common/utils/models.py b/statgpt/common/utils/models.py index fc113b4c..59a85d0e 100644 --- a/statgpt/common/utils/models.py +++ b/statgpt/common/utils/models.py @@ -15,9 +15,7 @@ def get_chat_model( model_config: LLMModelConfig, azure_endpoint: str = dial_settings.url, timeout: httpx.Timeout | None = None, - **kwargs, ) -> AzureChatOpenAI: - # default params if not isinstance(api_key, SecretStr): api_key = SecretStr(api_key) if not timeout: @@ -26,13 +24,12 @@ def get_chat_model( azure_endpoint=azure_endpoint, api_version=model_config.api_version, azure_deployment=model_config.deployment.deployment_id, - temperature=model_config.temperature, - seed=model_config.seed, max_retries=10, api_key=api_key, # since we use SecretStr, it won't be logged timeout=timeout, # timeouts are crucial! ) - params.update(kwargs) # update default params + + params.update(model_config.model_dump(mode="json", exclude_none=True, exclude={"deployment"})) if model_config.deployment.is_gpt_41_family: callback = BrokenResponseInterceptor(regex_pattern=r'\s{5,}') @@ -49,7 +46,6 @@ def get_embeddings_model( api_key: str | SecretStr, model_config: EmbeddingsModelConfig, azure_endpoint: str = dial_settings.url, - **kwargs, ) -> AzureOpenAIEmbeddings: if not isinstance(api_key, SecretStr): api_key = SecretStr(api_key) @@ -60,7 +56,6 @@ def get_embeddings_model( max_retries=10, api_key=api_key, # since we use SecretStr, it won't be logged ) - params.update(kwargs) # update default params api_key_log = f'{api_key.get_secret_value()[:3]}*****{api_key.get_secret_value()[-2:]}' logger.info( f'creating langchain embeddings with the following params: {params}, Api key: {api_key_log}'