Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ wheels/
# Local Files and Directories
.output_dataset
output/
/resources/
/resources*/
**/tmp

# Docker
Expand Down
10 changes: 7 additions & 3 deletions examples/configs/dataset_generator/llm_config.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
# llm model configuration

# OpenAI LLM
# LLM provider
# Accepted values: openai, gemini
name: openai

# Chat model name
model: gpt-5-nano-2025-08-07

# Maximum number of tokens the model may return
max_tokens: 100000
# (Optional) Reasoning effort parameter of the LLM.
# If reasoning_effort is not provided, the related Langchain parameter is not set.
# If reasoning_effort is set, and its level is not found, the level is set to the lowest level for the given model.
# Since the different LLM have different levels of reasoning, you must refer to the langchain documentation.
reasoning_effort: minimal

# Environment variable where LLM API key is stored
api_key_env: OPENAI_API_KEY
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,36 @@

import logging
from pathlib import Path
from typing import Optional
from typing import Optional, Literal

import yaml
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, model_validator

log = logging.getLogger(__name__)


class LLMConfig(BaseModel):
name: str
name: Literal['openai', 'gemini']
model: str
max_tokens: int = Field(default=512, gt=0)
reasoning_effort: Optional[str] = Field(default=None, description="The reasoning effort of the model")
api_key_env: Optional[str] = None

@model_validator(mode="after")
def set_reasoning_effort_defaults(self) -> "LLMConfig":
if self.reasoning_effort is not None:
if self.name == "openai":
default_effort_mode = "minimal"
effort_list = ["minimal", "low", "medium", "high"]
else:
default_effort_mode = "low"
effort_list = ["low", "high"]

if self.reasoning_effort not in effort_list:
self.reasoning_effort = default_effort_mode

return self


@classmethod
def load(cls, path: str | Path = "llm_config.yaml") -> LLMConfig:
path = Path(path).resolve()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,19 @@ def build_openai(config: LLMConfig) -> BaseChatModel:
if not key:
log.error("OpenAI API key not set %s in the env", config.api_key_env)
raise ValueError("OpenAI API key not set.")
log.debug("Building OpenAI ChatModel using name=%s, model=%s, max_tokens=%s",
config.name, config.model, config.max_tokens)
return ChatOpenAI(
model=config.model,
max_tokens=config.max_tokens, # type: ignore[arg-type]
api_key=SecretStr(key),
)
log.debug("Building OpenAI ChatModel using name=%s, model=%s, reasoning_effort=%s",
config.name, config.model, config.reasoning_effort)
if config.reasoning_effort is None:
return ChatOpenAI(
model=config.model,
api_key=SecretStr(key),
)
else:
return ChatOpenAI(
model=config.model,
api_key=SecretStr(key),
reasoning_effort=config.reasoning_effort,
)


def build_gemini(config: LLMConfig) -> BaseChatModel:
Expand All @@ -41,12 +47,16 @@ def build_gemini(config: LLMConfig) -> BaseChatModel:
if not key:
log.error("Google Gemini API key not set %s in the env", config.api_key_env)
raise ValueError("Google Gemini API key not set.")
log.debug("Building Google Gemini ChatModel using name=%s, model=%s, max_tokens=%s",
config.name, config.model, config.max_tokens)
log.debug("Building Google Gemini ChatModel using name=%s, model=%s, reasoning_effort=%s",
config.name, config.model, config.reasoning_effort)
return ChatGoogleGenerativeAI(
model=config.model,
max_output_tokens=config.max_tokens,
google_api_key=key,
model_kwargs={
"thinking_config": {
"thinking_budget": config.reasoning_effort,
}
}
)


Expand Down
6 changes: 2 additions & 4 deletions tests/resources/llm_config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
llm_provider: "openai"
model: "gpt-3.5-turbo"
temperature: 0.7
max_tokens: 1000
name: openai
model: gpt-5-nano-2025-08-07