Skip to content

Commit b090bd2

Browse files
Merge pull request #17 from SeaseLtd/github-issues/13-lazy-import
Add lazy import feature for LLM
2 parents 6cc7021 + 3764786 commit b090bd2

File tree

5 files changed

+99
-7
lines changed

5 files changed

+99
-7
lines changed

src/llm_search_quality_evaluation/dataset_generator/llm/llm_provider_factory.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
llm_provider_factory.py
33
44
Provides a simple Factory for creating LangChain ChatModel instances
5-
and currently only 2 LLMs - openai and gemini are supported in the factory.
5+
with lazy initialization for the 2 currently supported LLMs - openai and gemini.
66
77
"""
88

99
import logging
1010
import os
11+
from typing import Optional
1112

1213
from dotenv import load_dotenv
1314
from langchain_core.language_models import BaseChatModel
@@ -60,11 +61,29 @@ def build_gemini(config: LLMConfig) -> BaseChatModel:
6061
)
6162

6263

64+
class LazyLLM:
65+
def __init__(self, config: LLMConfig):
66+
self.config = config
67+
self._llm: Optional[BaseChatModel] = None
68+
69+
@property
70+
def llm(self) -> BaseChatModel:
71+
if self._llm is None:
72+
log.info("Initializing LLM for the first time: provider=%s, model=%s",
73+
self.config.name, self.config.model)
74+
self._llm = LLMServiceFactory.build(self.config)
75+
return self._llm
76+
77+
def __getattr__(self, name): # type: ignore[no-untyped-def]
78+
return getattr(self.llm, name)
79+
80+
6381
class LLMServiceFactory:
6482
PROVIDER_REGISTRY = {
6583
"openai": build_openai,
6684
"gemini": build_gemini,
6785
}
86+
_cached_lazy_llm: Optional[LazyLLM] = None
6887

6988
@classmethod
7089
def build(cls, config: LLMConfig) -> BaseChatModel:
@@ -73,5 +92,15 @@ def build(cls, config: LLMConfig) -> BaseChatModel:
7392
if provider_name not in cls.PROVIDER_REGISTRY:
7493
log.error("Unsupported LLM provider requested: %s", provider_name)
7594
raise ValueError(f"Unsupported provider: {provider_name}")
76-
log.info("Selected LLM provider=%s, model=%s", provider_name, provider_model)
95+
log.info("Building LLM provider=%s, model=%s", provider_name, provider_model)
7796
return cls.PROVIDER_REGISTRY[provider_name](config)
97+
98+
@classmethod
99+
def build_lazy(cls, config: LLMConfig) -> LazyLLM:
100+
if cls._cached_lazy_llm is None:
101+
log.debug("Creating lazy LLM wrapper for: provider=%s, model=%s", config.name, config.model)
102+
cls._cached_lazy_llm = LazyLLM(config)
103+
else:
104+
log.debug("Reusing cached lazy LLM wrapper for: provider=%s, model=%s", config.name, config.model)
105+
106+
return cls._cached_lazy_llm

src/llm_search_quality_evaluation/dataset_generator/llm/llm_service.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
import logging
33
from typing import Optional
44

5-
from langchain_core.language_models import BaseChatModel
65
from langchain_core.messages import HumanMessage, SystemMessage
76
from pydantic import BaseModel, ValidationError
87

8+
from llm_search_quality_evaluation.dataset_generator.llm.llm_provider_factory import LazyLLM
99
from llm_search_quality_evaluation.dataset_generator.models.query_response import LLMQueryResponse
1010
from llm_search_quality_evaluation.dataset_generator.models.score_response import LLMScoreResponse
1111
from llm_search_quality_evaluation.shared.models.document import Document
@@ -16,7 +16,7 @@
1616

1717

1818
class LLMService:
19-
def __init__(self, chat_model: BaseChatModel):
19+
def __init__(self, chat_model: LazyLLM):
2020
self.chat_model = chat_model
2121

2222
@staticmethod

src/llm_search_quality_evaluation/dataset_generator/main.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
# ------ temporary import for corpus.json bug workaround ------
44
import json
55
from pathlib import Path
6+
7+
from llm_search_quality_evaluation.dataset_generator.llm.llm_provider_factory import LazyLLM
68
from llm_search_quality_evaluation.shared.utils import _to_string
79
import argparse
810
# -------------------------------------------------------------
911

1012
from typing import List
11-
from langchain_core.language_models import BaseChatModel
1213
from logging import Logger, getLogger
1314

1415
# project imports
@@ -138,7 +139,7 @@ def main() -> None:
138139
search_engine_type=config.search_engine_type,
139140
endpoint=config.search_engine_collection_endpoint
140141
)
141-
llm: BaseChatModel = LLMServiceFactory.build(LLMConfig.load(config.llm_configuration_file))
142+
llm: LazyLLM = LLMServiceFactory.build_lazy(LLMConfig.load(config.llm_configuration_file))
142143
service: LLMService = LLMService(chat_model=llm)
143144
writer: AbstractWriter = WriterFactory.build(writer_config)
144145

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import pytest
2+
from pydantic_core import ValidationError
3+
4+
from llm_search_quality_evaluation.dataset_generator.llm import LLMConfig, LLMService
5+
from llm_search_quality_evaluation.dataset_generator.llm.llm_provider_factory import LazyLLM, LLMServiceFactory
6+
from llm_search_quality_evaluation.shared.models import Document
7+
8+
9+
@pytest.fixture
10+
def example_doc():
11+
"""Provides a sample Document object for testing."""
12+
return Document(
13+
id="doc1",
14+
fields={
15+
"title": "Car of the Year",
16+
"description": "The Toyota Camry, the nation's most popular car has now been rated as its best new model."
17+
}
18+
)
19+
20+
21+
@pytest.fixture
22+
def query():
23+
return "Is a Toyota the car of the year?"
24+
25+
26+
def test_llm_factory_lazy__expected__llm_none():
27+
cfg = LLMConfig(
28+
name="openai",
29+
model="mock_model",
30+
max_tokens= 1024,
31+
api_key_env="mock_api_key",
32+
)
33+
llm: LazyLLM = LLMServiceFactory.build_lazy(cfg)
34+
assert llm._llm is None
35+
36+
def test_llm_factory_invalid_model_name__expected__validation_error():
37+
with pytest.raises(ValidationError):
38+
_ = LLMConfig(
39+
name="mock_provider",
40+
model="mock_model",
41+
max_tokens= 1024,
42+
api_key_env="mock_api_key",
43+
)
44+
45+
46+
47+
@pytest.mark.parametrize("provider, model", [
48+
("openai", "gpt-5-nano-2025-08-07"),
49+
("gemini", "gemini-3-pro-preview"),
50+
])
51+
def test_llm_factory_lazy_openai__expected__api_key_not_valid(example_doc, query, provider, model):
52+
cfg = LLMConfig(
53+
name=provider,
54+
model=model,
55+
max_tokens=1024,
56+
api_key_env="invalid_api_key",
57+
)
58+
llm: LazyLLM = LLMServiceFactory.build_lazy(cfg)
59+
60+
service: LLMService = LLMService(chat_model=llm)
61+
with pytest.raises(ValueError):
62+
_ = service.generate_score(example_doc, query, relevance_scale='binary')

tests/llm_search_quality_evaluation/dataset_generator/test_main_autosave.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def test_main_passes_autosave_option_to_datastore(monkeypatch, tmp_path: Path):
5555
# Patch factories to avoid network / heavy dependencies
5656
monkeypatch.setattr(main_mod, "SearchEngineFactory", types.SimpleNamespace(build=lambda **kwargs: object()))
5757
monkeypatch.setattr(main_mod, "LLMConfig", types.SimpleNamespace(load=lambda _path: object()))
58-
monkeypatch.setattr(main_mod, "LLMServiceFactory", types.SimpleNamespace(build=lambda _cfg: object()))
58+
monkeypatch.setattr(main_mod, "LLMServiceFactory", types.SimpleNamespace(build_lazy=lambda _cfg: object()))
5959
monkeypatch.setattr(main_mod, "WriterFactory", types.SimpleNamespace(build=lambda _cfg: DummyWriter()))
6060

6161
# No-op the heavy flow functions to keep the test focused on wiring

0 commit comments

Comments
 (0)