Skip to content

Commit d3ddfd8

Browse files
[issues-13] Add lazy import feature
1 parent eb4c886 commit d3ddfd8

File tree

5 files changed

+90
-7
lines changed

5 files changed

+90
-7
lines changed

src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
llm_provider_factory.py
33
44
Provides a simple Factory for creating LangChain ChatModel instances
5-
and currently only 2 LLMs - openai and gemini are supported in the factory.
5+
with lazy initialization for the 2 currently supported LLMs - openai and gemini.
66
77
"""
88

99
import logging
1010
import os
11+
from typing import Optional
1112

1213
from dotenv import load_dotenv
1314
from langchain_core.language_models import BaseChatModel
@@ -50,11 +51,29 @@ def build_gemini(config: LLMConfig) -> BaseChatModel:
5051
)
5152

5253

54+
class LazyLLM:
55+
def __init__(self, config: LLMConfig):
56+
self.config = config
57+
self._llm: Optional[BaseChatModel] = None
58+
59+
@property
60+
def llm(self) -> BaseChatModel:
61+
if self._llm is None:
62+
log.info("Initializing LLM for the first time: provider=%s, model=%s",
63+
self.config.name, self.config.model)
64+
self._llm = LLMServiceFactory.build(self.config)
65+
return self._llm
66+
67+
def __getattr__(self, name): # type: ignore[no-untyped-def]
68+
return getattr(self.llm, name)
69+
70+
5371
class LLMServiceFactory:
5472
PROVIDER_REGISTRY = {
5573
"openai": build_openai,
5674
"gemini": build_gemini,
5775
}
76+
_cache: dict[str, LazyLLM] = {}
5877

5978
@classmethod
6079
def build(cls, config: LLMConfig) -> BaseChatModel:
@@ -63,5 +82,17 @@ def build(cls, config: LLMConfig) -> BaseChatModel:
6382
if provider_name not in cls.PROVIDER_REGISTRY:
6483
log.error("Unsupported LLM provider requested: %s", provider_name)
6584
raise ValueError(f"Unsupported provider: {provider_name}")
66-
log.info("Selected LLM provider=%s, model=%s", provider_name, provider_model)
85+
log.info("Building LLM provider=%s, model=%s", provider_name, provider_model)
6786
return cls.PROVIDER_REGISTRY[provider_name](config)
87+
88+
@classmethod
89+
def build_lazy(cls, config: LLMConfig) -> LazyLLM:
90+
cache_key = f"{config.name}:{config.model}"
91+
92+
if cache_key not in cls._cache:
93+
log.debug("Creating lazy LLM wrapper for %s", cache_key)
94+
cls._cache[cache_key] = LazyLLM(config)
95+
else:
96+
log.debug("Reusing cached lazy LLM wrapper for %s", cache_key)
97+
98+
return cls._cache[cache_key]

src/llm_search_quality_evaluation/dataset_generator/llm/llm_service.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
import logging
33
from typing import Optional
44

5-
from langchain_core.language_models import BaseChatModel
65
from langchain_core.messages import HumanMessage, SystemMessage
76
from pydantic import BaseModel, ValidationError
87

8+
from llm_search_quality_evaluation.dataset_generator.llm.llm_provider_factory import LazyLLM
99
from llm_search_quality_evaluation.dataset_generator.models.query_response import LLMQueryResponse
1010
from llm_search_quality_evaluation.dataset_generator.models.score_response import LLMScoreResponse
1111
from llm_search_quality_evaluation.shared.models.document import Document
@@ -16,7 +16,7 @@
1616

1717

1818
class LLMService:
19-
def __init__(self, chat_model: BaseChatModel):
19+
def __init__(self, chat_model: LazyLLM):
2020
self.chat_model = chat_model
2121

2222
@staticmethod

src/llm_search_quality_evaluation/dataset_generator/main.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
# ------ temporary import for corpus.json bug workaround ------
44
import json
55
from pathlib import Path
6+
7+
from llm_search_quality_evaluation.dataset_generator.llm.llm_provider_factory import LazyLLM
68
from llm_search_quality_evaluation.shared.utils import _to_string
79
import argparse
810
# -------------------------------------------------------------
911

1012
from typing import List
11-
from langchain_core.language_models import BaseChatModel
1213
from logging import Logger, getLogger
1314

1415
# project imports
@@ -138,7 +139,7 @@ def main() -> None:
138139
search_engine_type=config.search_engine_type,
139140
endpoint=config.search_engine_collection_endpoint
140141
)
141-
llm: BaseChatModel = LLMServiceFactory.build(LLMConfig.load(config.llm_configuration_file))
142+
llm: LazyLLM = LLMServiceFactory.build_lazy(LLMConfig.load(config.llm_configuration_file))
142143
service: LLMService = LLMService(chat_model=llm)
143144
writer: AbstractWriter = WriterFactory.build(writer_config)
144145

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import pytest
2+
3+
from llm_search_quality_evaluation.dataset_generator.llm import LLMConfig, LLMService
4+
from llm_search_quality_evaluation.dataset_generator.llm.llm_provider_factory import LazyLLM, LLMServiceFactory
5+
from llm_search_quality_evaluation.shared.models import Document
6+
7+
8+
@pytest.fixture
9+
def example_doc():
10+
"""Provides a sample Document object for testing."""
11+
return Document(
12+
id="doc1",
13+
fields={
14+
"title": "Car of the Year",
15+
"description": "The Toyota Camry, the nation's most popular car has now been rated as its best new model."
16+
}
17+
)
18+
19+
20+
@pytest.fixture
21+
def query():
22+
return "Is a Toyota the car of the year?"
23+
24+
25+
def test_llm_factory_lazy__expected__llm_none():
26+
cfg = LLMConfig(
27+
name="mock_name",
28+
model="mock_model",
29+
max_tokens= 1024,
30+
api_key_env="mock_api_key",
31+
)
32+
llm: LazyLLM = LLMServiceFactory.build_lazy(cfg)
33+
assert llm._llm is None
34+
35+
36+
@pytest.mark.parametrize("provider, model", [
37+
("openai", "gpt-5-nano-2025-08-07"),
38+
("gemini", "gemini-3-pro-preview"),
39+
])
40+
def test_llm_factory_lazy_openai__expected__api_key_not_valid(example_doc, query, provider, model):
41+
cfg = LLMConfig(
42+
name=provider,
43+
model=model,
44+
max_tokens=1024,
45+
api_key_env="invalid_api_key",
46+
)
47+
llm: LazyLLM = LLMServiceFactory.build_lazy(cfg)
48+
49+
service: LLMService = LLMService(chat_model=llm)
50+
with pytest.raises(ValueError):
51+
_ = service.generate_score(example_doc, query, relevance_scale='binary')

tests/llm_search_quality_evaluation/dataset_generator/test_main_autosave.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def test_main_passes_autosave_option_to_datastore(monkeypatch, tmp_path: Path):
5555
# Patch factories to avoid network / heavy dependencies
5656
monkeypatch.setattr(main_mod, "SearchEngineFactory", types.SimpleNamespace(build=lambda **kwargs: object()))
5757
monkeypatch.setattr(main_mod, "LLMConfig", types.SimpleNamespace(load=lambda _path: object()))
58-
monkeypatch.setattr(main_mod, "LLMServiceFactory", types.SimpleNamespace(build=lambda _cfg: object()))
58+
monkeypatch.setattr(main_mod, "LLMServiceFactory", types.SimpleNamespace(build_lazy=lambda _cfg: object()))
5959
monkeypatch.setattr(main_mod, "WriterFactory", types.SimpleNamespace(build=lambda _cfg: DummyWriter()))
6060

6161
# No-op the heavy flow functions to keep the test focused on wiring

0 commit comments

Comments
 (0)