From 9cb854ba6e0d6c9f73d699f85dbb03e215016994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Tue, 9 Dec 2025 11:51:59 +0100 Subject: [PATCH 1/5] [issues-7] Add reasoning param All tests passed, didn't yet try the reasoning in the main script --- .../configs/dataset_generator/llm_config.yaml | 6 ++-- .../dataset_generator/llm/llm_config.py | 2 +- .../llm/llm_provider_factory.py | 32 +++++++++++++------ tests/resources/llm_config.yaml | 6 ++-- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/examples/configs/dataset_generator/llm_config.yaml b/examples/configs/dataset_generator/llm_config.yaml index cf5f10a..2ee2b5f 100644 --- a/examples/configs/dataset_generator/llm_config.yaml +++ b/examples/configs/dataset_generator/llm_config.yaml @@ -6,8 +6,10 @@ name: openai # Chat model name model: gpt-5-nano-2025-08-07 -# Maximum number of tokens the model may return -max_tokens: 100000 +# Reasoning effort parameter of the LLM: avoid to disable reasoning +# Since the different LLM have different levels of reasoning, you must refer to the langchain documentation. If a +# non-valid level is sent, then the reasoning is set to the lowest possible (but still enabled) +reasoning_effort: low # Environment variable where LLM API key is stored api_key_env: OPENAI_API_KEY diff --git a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py index 835b206..f4bef25 100644 --- a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py +++ b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py @@ -13,7 +13,7 @@ class LLMConfig(BaseModel): name: str model: str - max_tokens: int = Field(default=512, gt=0) + reasoning_effort: Optional[str] = Field(default=None, description="The reasoning effort of the model") api_key_env: Optional[str] = None @classmethod diff --git a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py index a3a64bf..bd868cd 100644 --- a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py +++ b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py @@ -22,31 +22,43 @@ def build_openai(config: LLMConfig) -> BaseChatModel: load_dotenv() # load .env file + available_reasoning_effort = ["minimal", "low", "medium", "high"] key = os.getenv(config.api_key_env or "OPENAI_API_KEY") if not key: log.error("OpenAI API key not set %s in the env", config.api_key_env) raise ValueError("OpenAI API key not set.") - log.debug("Building OpenAI ChatModel using name=%s, model=%s, max_tokens=%s", - config.name, config.model, config.max_tokens) - return ChatOpenAI( - model=config.model, - max_tokens=config.max_tokens, # type: ignore[arg-type] - api_key=SecretStr(key), - ) + log.debug("Building OpenAI ChatModel using name=%s, model=%s, reasoning_effort=%s", + config.name, config.model, config.reasoning_effort) + if config.reasoning_effort is None: + return ChatOpenAI( + model=config.model, + api_key=SecretStr(key), + ) + else: + return ChatOpenAI( + model=config.model, + api_key=SecretStr(key), + reasoning_effort=config.reasoning_effort if config.reasoning_effort in available_reasoning_effort else "minimal", + ) def build_gemini(config: LLMConfig) -> BaseChatModel: load_dotenv() # load .env file + available_reasoning_effort = ["low", "high"] key = os.getenv(config.api_key_env or "GOOGLE_API_KEY") if not key: log.error("Google Gemini API key not set %s in the env", config.api_key_env) raise ValueError("Google Gemini API key not set.") - log.debug("Building Google Gemini ChatModel using name=%s, model=%s, max_tokens=%s", - config.name, config.model, config.max_tokens) + log.debug("Building Google Gemini ChatModel using name=%s, model=%s, reasoning_effort=%s", + config.name, config.model, config.reasoning_effort) return ChatGoogleGenerativeAI( model=config.model, - max_output_tokens=config.max_tokens, google_api_key=key, + model_kwargs={ + "thinking_config": { + "thinking_level": config.reasoning_effort if config.reasoning_effort in available_reasoning_effort else "low", + } + } ) diff --git a/tests/resources/llm_config.yaml b/tests/resources/llm_config.yaml index 704ecd6..34a5405 100644 --- a/tests/resources/llm_config.yaml +++ b/tests/resources/llm_config.yaml @@ -1,4 +1,2 @@ -llm_provider: "openai" -model: "gpt-3.5-turbo" -temperature: 0.7 -max_tokens: 1000 \ No newline at end of file +name: openai +model: gpt-5-nano-2025-08-07 From b0822f016454fd4f3e0e30644f720e373e16e033 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Tue, 9 Dec 2025 17:23:07 +0100 Subject: [PATCH 2/5] [issues-7] Add reasoning param All tests passed and tried some configurations. Now the validation happens at the Pydantic model level --- .gitignore | 2 +- .../configs/dataset_generator/llm_config.yaml | 4 ++-- .../dataset_generator/llm/llm_config.py | 21 ++++++++++++++++--- .../llm/llm_provider_factory.py | 6 ++---- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 64b187d..f0a3759 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,7 @@ wheels/ # Local Files and Directories .output_dataset output/ -/resources/ +/resources*/ **/tmp # Docker diff --git a/examples/configs/dataset_generator/llm_config.yaml b/examples/configs/dataset_generator/llm_config.yaml index 2ee2b5f..b4a317b 100644 --- a/examples/configs/dataset_generator/llm_config.yaml +++ b/examples/configs/dataset_generator/llm_config.yaml @@ -8,8 +8,8 @@ model: gpt-5-nano-2025-08-07 # Reasoning effort parameter of the LLM: avoid to disable reasoning # Since the different LLM have different levels of reasoning, you must refer to the langchain documentation. If a -# non-valid level is sent, then the reasoning is set to the lowest possible (but still enabled) -reasoning_effort: low +# non-valid level is sent, then the reasoning is set to the lowest reasoning level possible (but still enabled) +reasoning_effort: minimal # Environment variable where LLM API key is stored api_key_env: OPENAI_API_KEY diff --git a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py index f4bef25..447f587 100644 --- a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py +++ b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py @@ -2,20 +2,35 @@ import logging from pathlib import Path -from typing import Optional +from typing import Optional, Literal import yaml -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator log = logging.getLogger(__name__) class LLMConfig(BaseModel): - name: str + name: Literal['openai', 'gemini'] model: str reasoning_effort: Optional[str] = Field(default=None, description="The reasoning effort of the model") api_key_env: Optional[str] = None + @model_validator(mode="after") + def set_reasoning_effort_defaults(self) -> "LLMConfig": + if self.name == "openai": + defualt_effort_mode = "minimal" + effort_list = ["minimal", "low", "medium", "high"] + else: + defualt_effort_mode = "low" + effort_list = ["low", "high"] + + if self.reasoning_effort not in effort_list and self.reasoning_effort is not None: + self.reasoning_effort = defualt_effort_mode + + return self + + @classmethod def load(cls, path: str | Path = "llm_config.yaml") -> LLMConfig: path = Path(path).resolve() diff --git a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py index bd868cd..76adfcc 100644 --- a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py +++ b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py @@ -22,7 +22,6 @@ def build_openai(config: LLMConfig) -> BaseChatModel: load_dotenv() # load .env file - available_reasoning_effort = ["minimal", "low", "medium", "high"] key = os.getenv(config.api_key_env or "OPENAI_API_KEY") if not key: log.error("OpenAI API key not set %s in the env", config.api_key_env) @@ -38,13 +37,12 @@ def build_openai(config: LLMConfig) -> BaseChatModel: return ChatOpenAI( model=config.model, api_key=SecretStr(key), - reasoning_effort=config.reasoning_effort if config.reasoning_effort in available_reasoning_effort else "minimal", + reasoning_effort=config.reasoning_effort, ) def build_gemini(config: LLMConfig) -> BaseChatModel: load_dotenv() # load .env file - available_reasoning_effort = ["low", "high"] key = os.getenv(config.api_key_env or "GOOGLE_API_KEY") if not key: log.error("Google Gemini API key not set %s in the env", config.api_key_env) @@ -56,7 +54,7 @@ def build_gemini(config: LLMConfig) -> BaseChatModel: google_api_key=key, model_kwargs={ "thinking_config": { - "thinking_level": config.reasoning_effort if config.reasoning_effort in available_reasoning_effort else "low", + "thinking_level": config.reasoning_effort, } } ) From 95ce8197930c3ee45c298ba95cbaa13c7ee8b10b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Wed, 10 Dec 2025 10:06:00 +0100 Subject: [PATCH 3/5] [issues-7] fixed typo in llm_config.py and config file comment llm_config.yaml --- examples/configs/dataset_generator/llm_config.yaml | 10 ++++++---- .../dataset_generator/llm/llm_config.py | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/examples/configs/dataset_generator/llm_config.yaml b/examples/configs/dataset_generator/llm_config.yaml index b4a317b..3798439 100644 --- a/examples/configs/dataset_generator/llm_config.yaml +++ b/examples/configs/dataset_generator/llm_config.yaml @@ -1,14 +1,16 @@ # llm model configuration -# OpenAI LLM +# LLM provider +# Accepted values: openai, gemini name: openai # Chat model name model: gpt-5-nano-2025-08-07 -# Reasoning effort parameter of the LLM: avoid to disable reasoning -# Since the different LLM have different levels of reasoning, you must refer to the langchain documentation. If a -# non-valid level is sent, then the reasoning is set to the lowest reasoning level possible (but still enabled) +# (Optional) Reasoning effort parameter of the LLM. +# If reasoning_effort is not provided, the related Langchain parameter is not set. +# If reasoning_effort is set, and its level is not found, the level is set to the lowest level for the given model. +# Since the different LLM have different levels of reasoning, you must refer to the langchain documentation. reasoning_effort: minimal # Environment variable where LLM API key is stored diff --git a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py index 447f587..f6d5837 100644 --- a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py +++ b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py @@ -19,14 +19,14 @@ class LLMConfig(BaseModel): @model_validator(mode="after") def set_reasoning_effort_defaults(self) -> "LLMConfig": if self.name == "openai": - defualt_effort_mode = "minimal" + default_effort_mode = "minimal" effort_list = ["minimal", "low", "medium", "high"] else: - defualt_effort_mode = "low" + default_effort_mode = "low" effort_list = ["low", "high"] if self.reasoning_effort not in effort_list and self.reasoning_effort is not None: - self.reasoning_effort = defualt_effort_mode + self.reasoning_effort = default_effort_mode return self From 2de20d6c2db0f7824d4a6d8c92a98cb10d8632e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Wed, 10 Dec 2025 10:19:46 +0100 Subject: [PATCH 4/5] [issues-7] changed logic in `set_reasoning_effort_defaults` in llm_config.py --- .../dataset_generator/llm/llm_config.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py index f6d5837..3379099 100644 --- a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py +++ b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_config.py @@ -18,15 +18,16 @@ class LLMConfig(BaseModel): @model_validator(mode="after") def set_reasoning_effort_defaults(self) -> "LLMConfig": - if self.name == "openai": - default_effort_mode = "minimal" - effort_list = ["minimal", "low", "medium", "high"] - else: - default_effort_mode = "low" - effort_list = ["low", "high"] - - if self.reasoning_effort not in effort_list and self.reasoning_effort is not None: - self.reasoning_effort = default_effort_mode + if self.reasoning_effort is not None: + if self.name == "openai": + default_effort_mode = "minimal" + effort_list = ["minimal", "low", "medium", "high"] + else: + default_effort_mode = "low" + effort_list = ["low", "high"] + + if self.reasoning_effort not in effort_list: + self.reasoning_effort = default_effort_mode return self From 5f1f4713c9b8ebfdc5754ab30eaa3dda925f2508 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Wed, 10 Dec 2025 10:37:51 +0100 Subject: [PATCH 5/5] [issues-7] typo in `build_gemini` in llm_provider_factory.py --- .../dataset_generator/llm/llm_provider_factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py index 76adfcc..64cee10 100644 --- a/src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py +++ b/src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py @@ -54,7 +54,7 @@ def build_gemini(config: LLMConfig) -> BaseChatModel: google_api_key=key, model_kwargs={ "thinking_config": { - "thinking_level": config.reasoning_effort, + "thinking_budget": config.reasoning_effort, } } )