feat(RHOAIENG-33702): Update LMEval configuration to support environment variable for Kubernetes usage (#58)

ruivieira · leseb · web-flow · commit 545f12b467bf · 2025-09-10T10:21:16.000+01:00
* refactor: Update LMEval configuration to support environment variable for Kubernetes usage

* refactor: Fix environment variable defaults in run.yaml and linting

* Update run.yaml

Co-authored-by: Sébastien Han &lt;seb@redhat.com&gt;

* Update run.yaml

Co-authored-by: Sébastien Han &lt;seb@redhat.com&gt;

---------

Co-authored-by: Sébastien Han &lt;seb@redhat.com&gt;
diff --git a/run.yaml b/run.yaml
@@ -8,15 +8,15 @@ providers:
     - provider_id: vllm
       provider_type: remote::vllm
       config:
-        url: ${env.VLLM_URL:http://localhost:8000/v1}
+        url: ${env.VLLM_URL:=}
         max_tokens: ${env.VLLM_MAX_TOKENS:4096}
         api_token: ${env.VLLM_API_TOKEN:fake}
         tls_verify: ${env.VLLM_TLS_VERIFY:true}
   eval:
     - provider_id: trustyai_lmeval
       provider_type: remote::trustyai_lmeval
       config:
-        use_k8s: True
-        base_url: ${env.VLLM_URL:http://localhost:8000/v1}
+        use_k8s: ${env.TRUSTYAI_LMEVAL_USE_K8S:=true}
+        base_url: ${env.VLLM_URL:=}
         namespace: ${env.TRUSTYAI_LM_EVAL_NAMESPACE}
 external_providers_dir: ./providers.d
diff --git a/src/llama_stack_provider_lmeval/config.py b/src/llama_stack_provider_lmeval/config.py
@@ -130,10 +130,6 @@ def __post_init__(self):
         """Validate the configuration"""
         if not isinstance(self.use_k8s, bool):
             raise LMEvalConfigError("use_k8s must be a boolean")
-        if self.use_k8s is False:
-            raise LMEvalConfigError(
-                "Only Kubernetes LMEval backend is supported at the moment"
-            )
 
 
 __all__ = [
diff --git a/src/llama_stack_provider_lmeval/lmeval.py b/src/llama_stack_provider_lmeval/lmeval.py
@@ -923,27 +923,40 @@ class LMEval(Eval, BenchmarksProtocolPrivate):
 
     def __init__(self, config: LMEvalEvalProviderConfig):
         self._config = config
+        self._namespace: str | None = None
 
-        self._namespace = _resolve_namespace(self._config)
-
-        logger.debug("LMEval provider initialized with namespace: %s", self._namespace)
         logger.debug("LMEval provider config values: %s", vars(self._config))
         self.benchmarks: dict[str, Benchmark] = {}
         self._jobs: list[Job] = []
         self._job_metadata: dict[str, dict[str, Any]] = {}
 
         self._k8s_client: k8s_client.ApiClient | None = None
         self._k8s_custom_api: k8s_client.CustomObjectsApi | None = None
-        if self.use_k8s:
+        self._cr_builder: LMEvalCRBuilder | None = None
+
+    def _ensure_k8s_initialized(self):
+        """Ensure Kubernetes client and namespace are initialized when needed."""
+        if not self.use_k8s:
+            logger.warning("Non-K8s evaluation backend is not implemented yet")
+            return
+
+        if self._k8s_client is None:
             self._init_k8s_client()
-            logger.debug(
-                "Initialized Kubernetes client with namespace: %s", self._namespace
-            )
+
+        if self._namespace is None:
+            self._namespace = _resolve_namespace(self._config)
+            logger.debug("LMEval provider resolved namespace: %s", self._namespace)
+
+        if self._cr_builder is None:
             self._cr_builder = LMEvalCRBuilder(
                 namespace=self._namespace,
                 service_account=getattr(self._config, "service_account", None),
             )
             self._cr_builder._config = self._config
+            logger.debug(
+                "Initialized Kubernetes client and CR builder with namespace: %s",
+                self._namespace,
+            )
 
     def _init_k8s_client(self):
         """Initialize the Kubernetes client."""
@@ -1048,7 +1061,8 @@ def _deploy_lmeval_cr(self, cr: dict, job_id: str) -> None:
             pvc_name = None
 
             if (
-                self._cr_builder._config is not None
+                self._cr_builder is not None
+                and self._cr_builder._config is not None
                 and hasattr(self._cr_builder._config, "metadata")
                 and self._cr_builder._config.metadata
             ):
@@ -1198,6 +1212,7 @@ async def run_eval(
         Returns:
             Dict containing job_id for evaluation tracking
         """
+        self._ensure_k8s_initialized()
         if not self.use_k8s:
             raise NotImplementedError("Non-K8s evaluation not implemented yet")
 
@@ -1224,6 +1239,11 @@ async def run_eval(
                     benchmark_config.metadata["input"]["storage"],
                 )
 
+        if self._cr_builder is None:
+            raise LMEvalConfigError(
+                "CR builder not initialized - ensure K8s is properly configured"
+            )
+
         cr = self._cr_builder.create_cr(
             benchmark_id=benchmark_id,
             task_config=benchmark_config,
@@ -1295,6 +1315,7 @@ async def evaluate_rows(
         Returns:
             EvaluateResponse: Object containing generations and scores
         """
+        self._ensure_k8s_initialized()
         if not self.use_k8s:
             raise NotImplementedError("Non-K8s evaluation not implemented yet")
 
@@ -1323,6 +1344,7 @@ async def job_status(self, benchmark_id: str, job_id: str) -> dict[str, str] | N
         Returns:
             Dict with current status of the job
         """
+        self._ensure_k8s_initialized()
         if not self.use_k8s:
             raise NotImplementedError("Non-K8s evaluation not implemented yet")
 
@@ -1395,6 +1417,7 @@ async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
             benchmark_id: The benchmark identifier
             job_id: The job identifier
         """
+        self._ensure_k8s_initialized()
         if not self.use_k8s:
             raise NotImplementedError("Non-K8s evaluation not implemented yet")
 
@@ -1568,6 +1591,7 @@ async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
         Returns:
             EvaluateResponse: Results of the evaluation
         """
+        self._ensure_k8s_initialized()
         if not self.use_k8s:
             return EvaluateResponse(
                 generations=[],