@@ -10,106 +10,45 @@ metadata:
1010immutable : true
1111{{- end }}
1212data :
13- # ============================================================================
14- # UNIFIED CONFIGURATION SYSTEM
15- # ============================================================================
16- # This ConfigMap contains both static (immutable) and dynamic (mutable) settings.
13+ # The main configuration is stored as a single YAML file (config.yaml) that is
14+ # volume-mounted into the controller container at /etc/wva/config.yaml.
1715 #
18- # IMMUTABLE PARAMETERS (require controller restart to change):
19- # - PROMETHEUS_BASE_URL: Prometheus connection endpoint
20- # - TLS certificate paths (security-sensitive)
21- # - Metrics/Probe bind addresses (infrastructure)
22- # - Leader election ID (coordination)
16+ # Precedence: CLI flags > environment variables > config file > defaults
2317 #
24- # MUTABLE PARAMETERS (can be changed at runtime via ConfigMap updates):
25- # - GLOBAL_OPT_INTERVAL: Optimization interval
26- # - WVA_SCALE_TO_ZERO: Feature flag (can be changed, but may require restart for full effect)
27- # - Prometheus cache settings
28- #
29- # Attempts to change immutable parameters at runtime will be rejected and emit
30- # Warning events. See documentation for details.
31- # ============================================================================
32-
33- # ----------------------------------------------------------------------------
34- # IMMUTABLE: Prometheus Configuration (requires restart to change)
35- # ----------------------------------------------------------------------------
36- # REQUIRED: Set your Prometheus server URL
37- # Examples:
38- # - General: "https://prometheus:9090"
39- # - OpenShift: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091"
40- # - KIND cluster: "https://kube-prometheus-stack-prometheus.workload-variant-autoscaler-monitoring.svc.cluster.local:9090"
41- # PROMETHEUS_BASE_URL: "https://kube-prometheus-stack-prometheus.workload-variant-autoscaler-monitoring.svc.cluster.local:9090"
42- PROMETHEUS_BASE_URL : {{ .Values.wva.prometheus.baseURL | quote }}
43-
44- # TLS Configuration (TLS is always enabled for HTTPS-only support)
45- # PROMETHEUS_TLS_INSECURE_SKIP_VERIFY: "true" # Skip certificate verification (development/testing only)
46- PROMETHEUS_CA_CERT_PATH : {{ .Values.wva.prometheus.tls.caCertPath | default "/etc/ssl/certs/prometheus-ca.crt" | quote }} # CA certificate for server validation
47- # PROMETHEUS_CLIENT_CERT_PATH: "/path/to/client.crt" # Client certificate for mutual TLS
48- # PROMETHEUS_CLIENT_KEY_PATH: "/path/to/client.key" # Client private key for mutual TLS
49- # PROMETHEUS_SERVER_NAME: "prometheus.example.com" # Expected server name for SNI
50- PROMETHEUS_TLS_INSECURE_SKIP_VERIFY : {{ if and .Values.wva.prometheus.tls (hasKey .Values.wva.prometheus.tls "insecureSkipVerify") }}{{ .Values.wva.prometheus.tls.insecureSkipVerify | quote }}{{ else }}"true"{{ end }}
51-
52- # Authentication Configuration (BearerToken takes precedence over TokenPath)
53- # PROMETHEUS_BEARER_TOKEN: "your-token-here" # Direct bearer token (development/testing)
54- # PROMETHEUS_TOKEN_PATH: "/path/to/token/file" # Path to bearer token file (production with mounted secrets)
55-
56- # ----------------------------------------------------------------------------
57- # MUTABLE: EPP Integration Configuration (runtime-updatable)
58- # ----------------------------------------------------------------------------
59- # EPP metric reader bearer token for pod scraping
60- EPP_METRIC_READER_BEARER_TOKEN : " "
61-
62- # ----------------------------------------------------------------------------
63- # MUTABLE: Optimization Configuration (runtime-updatable)
64- # ----------------------------------------------------------------------------
65- # Global optimization interval - how often the controller runs optimization cycles
66- # Can be changed at runtime via ConfigMap updates (no restart required)
67- GLOBAL_OPT_INTERVAL : {{ .Values.wva.reconcileInterval | quote }}
68-
69- # ----------------------------------------------------------------------------
70- # MUTABLE: Feature Flags (runtime-updatable, but may require restart for full effect)
71- # ----------------------------------------------------------------------------
72- # Option to scale variants to zero replicas (default: false)
73- # Note: While this can be changed at runtime, some features may require restart
74- WVA_SCALE_TO_ZERO : {{ .Values.wva.scaleToZero | default "false" | quote }}
75-
76- # ----------------------------------------------------------------------------
77- # MUTABLE: Prometheus Metrics Cache Configuration (runtime-updatable)
78- # ----------------------------------------------------------------------------
79- # Each collector (Prometheus, EPP, etc.) has its own cache configuration
80- # Enable/disable Prometheus metrics caching (default: "true") - this is for debugging purposes, can be removed in the future
81- PROMETHEUS_METRICS_CACHE_ENABLED : {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.enabled | default "true" | quote }}{{ else }}"true"{{ end }}
82- # Prometheus cache TTL - how long metrics are cached before expiring (default: "30s")
83- PROMETHEUS_METRICS_CACHE_TTL : {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.ttl | default "30s" | quote }}{{ else }}"30s"{{ end }}
84- # Interval for background cleanup of expired Prometheus cache entries (default: "1m")
85- PROMETHEUS_METRICS_CACHE_CLEANUP_INTERVAL : {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.cleanupInterval | default "1m" | quote }}{{ else }}"1m"{{ end }}
86- # Background fetch interval - how often to fetch metrics in background (default: "30s", 0 = disable)
87- PROMETHEUS_METRICS_CACHE_FETCH_INTERVAL : {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.fetchInterval | default "30s" | quote }}{{ else }}"30s"{{ end }}
88- # Freshness thresholds - when metrics are considered fresh/stale/unavailable
89- PROMETHEUS_METRICS_CACHE_FRESH_THRESHOLD : {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.freshThreshold | default "1m" | quote }}{{ else }}"1m"{{ end }}
90- PROMETHEUS_METRICS_CACHE_STALE_THRESHOLD : {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.staleThreshold | default "2m" | quote }}{{ else }}"2m"{{ end }}
91- PROMETHEUS_METRICS_CACHE_UNAVAILABLE_THRESHOLD : {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.unavailableThreshold | default "5m" | quote }}{{ else }}"5m"{{ end }}
92-
93- # EPP metrics cache configuration (for future EPP collector)
94- # Uncomment and configure when EPP collector is implemented - future implementation
95- # EPP_METRICS_CACHE_ENABLED: "true"
96- # EPP_METRICS_CACHE_TTL: "15s"
97- # EPP_METRICS_CACHE_MAX_SIZE: "500"
98- # EPP_METRICS_CACHE_CLEANUP_INTERVAL: "30s"
99- # ============================================================================
100- # END OF CONFIGURATION
101- # ============================================================================
102- # For more information about immutable vs mutable parameters, see:
103- # https://github.com/llm-d/llm-d-workload-variant-autoscaler/blob/main/docs/user-guide/configuration.md#unified-configuration-system
104- #
105- # IMMUTABLE CONFIGMAP:
106- # If wva.configMap.immutable is set to true, this ConfigMap becomes immutable
107- # after creation. This provides security benefits:
108- # - Prevents accidental configuration changes
109- # - Protects against malicious modifications
110- # - Ensures configuration integrity
111- # However, this disables runtime config updates. To change configuration:
112- # 1. Delete the ConfigMap (kubectl delete configmap <name>)
113- # 2. Update Helm values and upgrade the release
114- # 3. Restart the controller pod
18+ # For more information see:
19+ # https://github.com/llm-d/llm-d-workload-variant-autoscaler/blob/main/docs/user-guide/configuration.md
20+ config.yaml : |
21+ # Prometheus Configuration (REQUIRED)
22+ # Base URL for Prometheus API (must use HTTPS).
23+ PROMETHEUS_BASE_URL: {{ .Values.wva.prometheus.baseURL | quote }}
24+ # Filesystem path to the CA certificate used to verify Prometheus TLS cert.
25+ PROMETHEUS_CA_CERT_PATH: {{ .Values.wva.prometheus.tls.caCertPath | default "/etc/ssl/certs/prometheus-ca.crt" | quote }}
26+ # Whether to skip TLS certificate verification when connecting to Prometheus.
27+ PROMETHEUS_TLS_INSECURE_SKIP_VERIFY: {{ if and .Values.wva.prometheus.tls (hasKey .Values.wva.prometheus.tls "insecureSkipVerify") }}{{ .Values.wva.prometheus.tls.insecureSkipVerify | quote }}{{ else }}"true"{{ end }}
28+
29+ # EPP Integration
30+ # Bearer token used to authenticate metric reads from EPP.
31+ EPP_METRIC_READER_BEARER_TOKEN: ""
32+
33+ # Optimization
34+ # Global optimization loop interval for autoscaling decisions.
35+ GLOBAL_OPT_INTERVAL: {{ .Values.wva.reconcileInterval | quote }}
36+
37+ # Feature Flags
38+ # Enables scale-to-zero behavior across managed workloads.
39+ WVA_SCALE_TO_ZERO: {{ .Values.wva.scaleToZero | default "false" | quote }}
40+
41+ # Prometheus Metrics Cache
42+ # Time-to-live for cached Prometheus metric responses.
43+ PROMETHEUS_METRICS_CACHE_TTL: {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.ttl | default "30s" | quote }}{{ else }}"30s"{{ end }}
44+ # Interval for cleaning up expired entries from the metrics cache.
45+ PROMETHEUS_METRICS_CACHE_CLEANUP_INTERVAL: {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.cleanupInterval | default "1m" | quote }}{{ else }}"1m"{{ end }}
46+ # Interval for background refresh of metrics cache entries.
47+ PROMETHEUS_METRICS_CACHE_FETCH_INTERVAL: {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.fetchInterval | default "30s" | quote }}{{ else }}"30s"{{ end }}
48+ # Maximum age for metrics to be considered fresh.
49+ PROMETHEUS_METRICS_CACHE_FRESH_THRESHOLD: {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.freshThreshold | default "1m" | quote }}{{ else }}"1m"{{ end }}
50+ # Maximum age for metrics to be considered stale (before unavailable).
51+ PROMETHEUS_METRICS_CACHE_STALE_THRESHOLD: {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.staleThreshold | default "2m" | quote }}{{ else }}"2m"{{ end }}
52+ # Maximum age for metrics before they are considered unavailable.
53+ PROMETHEUS_METRICS_CACHE_UNAVAILABLE_THRESHOLD: {{ if and .Values.wva.prometheus .Values.wva.prometheus.metricsCache }}{{ .Values.wva.prometheus.metricsCache.unavailableThreshold | default "5m" | quote }}{{ else }}"5m"{{ end }}
11554{{- end }}
0 commit comments