trustyai-explainability · ruivieira · Feb 27, 2026 · Feb 17, 2026 · Feb 18, 2026 · Feb 19, 2026
diff --git a/api/evalhub/v1alpha1/evalhub_types.go b/api/evalhub/v1alpha1/evalhub_types.go
@@ -45,6 +45,12 @@ type EvalHubSpec struct {
 	// +optional
 	Env []corev1.EnvVar `json:"env,omitempty"`
 
+	// Providers is the list of OOTB provider names to mount into the deployment.
+	// Each name must match a provider-name label on a ConfigMap in the operator namespace.
+	// +kubebuilder:default:={"garak","guidellm","lighteval","lm-evaluation-harness"}
+	// +optional
+	Providers []string `json:"providers,omitempty"`
+
 	// Database configuration for persistent storage.
 	// When set, the operator configures PostgreSQL via the referenced secret.
 	// When omitted, the service uses its default (in-memory SQLite).

diff --git a/api/evalhub/v1alpha1/zz_generated.deepcopy.go b/api/evalhub/v1alpha1/zz_generated.deepcopy.go
diff --git a/config/base/kustomization.yaml b/config/base/kustomization.yaml
@@ -6,6 +6,7 @@ resources:
   - ../rbac
   - ../manager
   - ../prometheus
+  - ../configmaps
 
 commonLabels:
   app.kubernetes.io/part-of: trustyai
@@ -56,3 +57,31 @@ vars:
       apiVersion: v1
     fieldref:
       fieldpath: data.evalHubImage
+  - name: evalhub-provider-garak-image
+    objref:
+      kind: ConfigMap
+      name: config
+      apiVersion: v1
+    fieldref:
+      fieldpath: data.evalhub-provider-garak-image
+  - name: evalhub-provider-guidellm-image
+    objref:
+      kind: ConfigMap
+      name: config
+      apiVersion: v1
+    fieldref:
+      fieldpath: data.evalhub-provider-guidellm-image
+  - name: evalhub-provider-lighteval-image
+    objref:
+      kind: ConfigMap
+      name: config
+      apiVersion: v1
+    fieldref:
+      fieldpath: data.evalhub-provider-lighteval-image
+  - name: evalhub-provider-lm-evaluation-harness-image
+    objref:
+      kind: ConfigMap
+      name: config
+      apiVersion: v1
+    fieldref:
+      fieldpath: data.evalhub-provider-lm-evaluation-harness-image
diff --git a/config/base/params.env b/config/base/params.env
@@ -16,4 +16,8 @@ guardrails-orchestrator-image=quay.io/trustyai/ta-guardrails-orchestrator:latest
 guardrails-built-in-detector-image=quay.io/trustyai/guardrails-detector-built-in:latest
 guardrails-sidecar-gateway-image=quay.io/trustyai/guardrails-sidecar-gateway:latest
 garak-provider-image=quay.io/trustyai/llama-stack-provider-trustyai-garak:latest
-nemo-guardrails-image=quay.io/trustyai/nemo-guardrails-server:latest
+nemo-guardrails-image=quay.io/trustyai/nemo-guardrails-server:latest
+evalhub-provider-garak-image=quay.io/evalhub/garak:latest
+evalhub-provider-guidellm-image=quay.io/evalhub/community-guidellm:latest
+evalhub-provider-lighteval-image=quay.io/evalhub/community-lighteval:latest
+evalhub-provider-lm-evaluation-harness-image=quay.io/opendatahub/ta-lmes-job:odh-3.4-ea2
diff --git a/config/base/params.yaml b/config/base/params.yaml
@@ -2,3 +2,5 @@
 varReference:
   - kind: Deployment
     path: spec/template/spec/containers[]/image
+  - kind: ConfigMap
+    path: data
diff --git a/config/configmaps/evalhub/kustomization.yaml b/config/configmaps/evalhub/kustomization.yaml
@@ -0,0 +1,7 @@
+resources:
+  - provider-garak.yaml
+  - provider-guidellm.yaml
+  - provider-lighteval.yaml
+  - provider-lm-evaluation-harness.yaml
+
+namespace: system
diff --git a/config/configmaps/evalhub/provider-garak.yaml b/config/configmaps/evalhub/provider-garak.yaml
@@ -0,0 +1,80 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: evalhub-provider-garak
+  labels:
+    trustyai.opendatahub.io/evalhub-provider-type: system
+    trustyai.opendatahub.io/evalhub-provider-name: garak
+data:
+  garak.yaml: |
+    id: garak
+    name: Garak
+    description: LLM vulnerability scanner and red-teaming framework
+    type: builtin
+    runtime:
+      k8s:
+        image: $(evalhub-provider-garak-image)
+        entrypoint:
+        - python
+        - /opt/app-root/src/main.py
+        cpu_request: 100m
+        memory_request: 128Mi
+        cpu_limit: 500m
+        memory_limit: 1Gi
+        env:
+        - name: VAR_NAME
+          value: VALUE
+      local: null
+    benchmarks:
+    - id: toxicity
+      name: Toxicity Detection
+      description: Tests model's tendency to generate toxic content
+      category: safety
+      metrics:
+      - toxicity_rate
+      - severity_score
+      num_few_shot: 0
+      dataset_size: 500
+      tags:
+      - safety
+      - toxicity
+      - red_team
+    - id: bias_detection
+      name: Bias Detection
+      description: Evaluates model for various forms of bias
+      category: fairness
+      metrics:
+      - bias_score
+      - demographic_parity
+      num_few_shot: 0
+      dataset_size: 1000
+      tags:
+      - fairness
+      - bias
+      - demographic
+    - id: pii_leakage
+      name: PII Leakage
+      description: Tests for personally identifiable information leakage
+      category: privacy
+      metrics:
+      - pii_leak_rate
+      - sensitivity_score
+      num_few_shot: 0
+      dataset_size: 300
+      tags:
+      - privacy
+      - pii
+      - security
+    - id: prompt_injection
+      name: Prompt Injection
+      description: Tests resilience against prompt injection attacks
+      category: security
+      metrics:
+      - injection_success_rate
+      - defense_effectiveness
+      num_few_shot: 0
+      dataset_size: 200
+      tags:
+      - security
+      - injection
+      - adversarial
diff --git a/config/configmaps/evalhub/provider-guidellm.yaml b/config/configmaps/evalhub/provider-guidellm.yaml
@@ -0,0 +1,130 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: evalhub-provider-guidellm
+  labels:
+    trustyai.opendatahub.io/evalhub-provider-type: system
+    trustyai.opendatahub.io/evalhub-provider-name: guidellm
+data:
+  guidellm.yaml: |
+    id: guidellm
+    name: GuideLLM
+    description: Performance benchmarking framework for LLM inference servers
+    type: builtin
+    runtime:
+      k8s:
+        image: $(evalhub-provider-guidellm-image)
+        entrypoint:
+        - python
+        - main.py
+        cpu_request: 100m
+        memory_request: 128Mi
+        cpu_limit: 1000m
+        memory_limit: 2Gi
+      local: null
+    benchmarks:
+    - id: sweep
+      name: Sweep Profile
+      description: Automatically finds optimal request rate by sweeping from low to high
+        concurrency
+      category: performance
+      metrics:
+      - requests_per_second
+      - prompt_tokens_per_second
+      - output_tokens_per_second
+      - mean_ttft_ms
+      - mean_itl_ms
+      tags:
+      - performance
+      - throughput
+      - latency
+      - guidellm
+      - auto
+    - id: throughput
+      name: Throughput Profile
+      description: Measures maximum throughput by gradually increasing request rate until
+        saturation
+      category: performance
+      metrics:
+      - requests_per_second
+      - prompt_tokens_per_second
+      - output_tokens_per_second
+      - mean_ttft_ms
+      - mean_itl_ms
+      tags:
+      - performance
+      - throughput
+      - guidellm
+      - saturation
+    - id: concurrent
+      name: Concurrent Profile
+      description: Tests performance with fixed number of concurrent requests
+      category: performance
+      metrics:
+      - requests_per_second
+      - prompt_tokens_per_second
+      - output_tokens_per_second
+      - mean_ttft_ms
+      - mean_itl_ms
+      tags:
+      - performance
+      - concurrency
+      - guidellm
+    - id: constant
+      name: Constant Rate Profile
+      description: Maintains constant request rate throughout benchmark duration
+      category: performance
+      metrics:
+      - requests_per_second
+      - prompt_tokens_per_second
+      - output_tokens_per_second
+      - mean_ttft_ms
+      - mean_itl_ms
+      tags:
+      - performance
+      - constant_rate
+      - guidellm
+    - id: poisson
+      name: Poisson Profile
+      description: Sends requests following Poisson distribution for realistic production
+        simulation
+      category: performance
+      metrics:
+      - requests_per_second
+      - prompt_tokens_per_second
+      - output_tokens_per_second
+      - mean_ttft_ms
+      - mean_itl_ms
+      tags:
+      - performance
+      - poisson
+      - realistic
+      - guidellm
+    - id: quick_perf_test
+      name: Quick Performance Test
+      description: Fast performance evaluation with sweep profile and limited samples
+      category: performance
+      metrics:
+      - requests_per_second
+      - mean_ttft_ms
+      - mean_itl_ms
+      tags:
+      - performance
+      - quick
+      - guidellm
+      - suite
+    - id: comprehensive_perf_test
+      name: Comprehensive Performance Test
+      description: Thorough performance evaluation across all profiles
+      category: performance
+      metrics:
+      - requests_per_second
+      - prompt_tokens_per_second
+      - output_tokens_per_second
+      - mean_ttft_ms
+      - mean_itl_ms
+      tags:
+      - performance
+      - comprehensive
+      - guidellm
+      - suite