diff --git a/.env.example b/.env.example index a3bfeed..e4d59e2 100644 --- a/.env.example +++ b/.env.example @@ -22,3 +22,7 @@ GITHUB_WEBHOOK_SECRET= # --- Grafana Security --- GF_SECURITY_ADMIN_PASSWORD= + +# --- Finops/GreenOps --- +ENERGY_COST_CAD_KWH=0.15 +CARBON_INTENSITY_G_KWH=150.0 \ No newline at end of file diff --git a/internal/analytics/analytics.go b/internal/analytics/analytics.go index eb1b7e3..e3acba7 100644 --- a/internal/analytics/analytics.go +++ b/internal/analytics/analytics.go @@ -6,8 +6,11 @@ import ( "fmt" "net/http" "net/url" + "os" "strconv" "time" + + "observability-hub/internal/telemetry" ) // MetricBatch represents a collection of metric samples keyed by type (cpu, ram, disk, network, temp). @@ -107,6 +110,17 @@ type ThanosResourceProvider struct { } func NewThanosResourceProvider(client *ThanosClient) *ThanosResourceProvider { + // Log initial factors on startup + carbon := os.Getenv("CARBON_INTENSITY_G_KWH") + if carbon == "" { + carbon = "150.0 (default)" + } + cost := os.Getenv("ENERGY_COST_CAD_KWH") + if cost == "" { + cost = "0.15 (default)" + } + telemetry.Info("analytics_factors_loaded", "carbon_intensity", carbon, "energy_cost", cost) + return &ThanosResourceProvider{Client: client} } @@ -178,6 +192,7 @@ func (p *ThanosResourceProvider) GetHostServiceCPU(ctx context.Context, start, e func (p *ThanosResourceProvider) GetValueUnits(ctx context.Context, start, end time.Time) (map[string]float64, error) { // 1. Define queries for all business value counters + // In the future, this could be loaded from a config file. queries := map[string]string{ "ingestion": "sum(increase(second_brain_sync_processed_total[15m])) + sum(increase(reading_sync_processed_total[15m]))", "proxy": "sum(increase(proxy_webhook_received_total[15m])) + sum(increase(proxy_synthetic_request_total[15m]))", @@ -199,15 +214,29 @@ func (p *ThanosResourceProvider) GetValueUnits(ctx context.Context, start, end t } func (p *ThanosResourceProvider) GetCarbonIntensity(ctx context.Context) (float64, error) { - - // Default: ~150g CO2 per kWh (Sample value for a "greenish" grid) - // In a real implementation, this could call an external API. - return 150.0, nil + // Use environment variable if present, otherwise default to 150.0 + valStr := os.Getenv("CARBON_INTENSITY_G_KWH") + if valStr == "" { + return 150.0, nil + } + val, err := strconv.ParseFloat(valStr, 64) + if err != nil { + return 150.0, nil + } + return val, nil } func (p *ThanosResourceProvider) GetCostFactor(ctx context.Context) (float64, error) { - // Default: $0.15 CAD per kWh (Sample price) + // Use environment variable if present, otherwise default to 0.15 CAD/kWh + valStr := os.Getenv("ENERGY_COST_CAD_KWH") + pricePerKWh := 0.15 + if valStr != "" { + if val, err := strconv.ParseFloat(valStr, 64); err == nil { + pricePerKWh = val + } + } + // 1 kWh = 3_600_000 Joules - // Cost per Joule = 0.15 / 3_600_000 - return 0.15 / 3600000.0, nil + // Cost per Joule = pricePerKWh / 3_600_000 + return pricePerKWh / 3600000.0, nil } diff --git a/internal/analytics/service.go b/internal/analytics/service.go index 1d8df4c..f0f21ca 100644 --- a/internal/analytics/service.go +++ b/internal/analytics/service.go @@ -268,6 +268,7 @@ func (s *Service) recordMetricsForFeature(ctx context.Context, t time.Time, feat telemetry.Info("feature_analytics_recorded", "feature_id", featureID, "joules", joules, "host", hostName) } + func mapContainerToFeature(container string) string { // Simple mapping based on known service names (containers or systemd units) mapping := map[string]string{ @@ -278,6 +279,7 @@ func mapContainerToFeature(container string) string { "analytics": "analytics-engine", "mcp-telemetry": "agentic-telemetry", "mcp-pods": "agentic-kubernetes", + "mcp-hub": "agentic-hub", "postgresql": "database-core", "postgres": "database-core", "prometheus-server": "observability-infra", diff --git a/k3s/analytics/values.yaml b/k3s/analytics/values.yaml index bd8326e..87f867d 100644 --- a/k3s/analytics/values.yaml +++ b/k3s/analytics/values.yaml @@ -13,6 +13,8 @@ nameOverride: "" fullnameOverride: "" podAnnotations: {} +podLabels: + app.kubernetes.io/feature: analytics-engine podSecurityContext: {} # fsGroup: 2000 diff --git a/k3s/grafana/dashboards/pod-resources.json b/k3s/grafana/dashboards/pod-resources.json index ba17b1e..08d9b40 100644 --- a/k3s/grafana/dashboards/pod-resources.json +++ b/k3s/grafana/dashboards/pod-resources.json @@ -264,7 +264,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "sum(rate(kepler_container_cpu_joules_total{container_name!=\"\", pod_id!=\"\", zone=\"package\"}[5m]) * on(pod_id) group_left(namespace) label_replace(kube_pod_info{namespace=\"observability\"}, \"pod_id\", \"$1\", \"uid\", \"(.*)\")) / 1000 * 0.15 * 24", + "expr": "sum(rate(kepler_container_cpu_joules_total{container_name!=\"\", pod_id!=\"\", zone=\"package\"}[5m]) * on(pod_id) group_left(namespace) label_replace(kube_pod_info{namespace=\"observability\"}, \"pod_id\", \"$1\", \"uid\", \"(.*)\")) / 1000 * $energy_cost_kwh * 24", "legendFormat": "CAD/Day", "range": true, "refId": "A" @@ -327,7 +327,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "sum(rate(kepler_container_cpu_joules_total{container_name!=\"\", pod_id!=\"\", zone=\"package\"}[5m]) * on(pod_id) group_left(namespace) label_replace(kube_pod_info{namespace=\"observability\"}, \"pod_id\", \"$1\", \"uid\", \"(.*)\")) / 1000 * 0.15 * 24 * 30", + "expr": "sum(rate(kepler_container_cpu_joules_total{container_name!=\"\", pod_id!=\"\", zone=\"package\"}[5m]) * on(pod_id) group_left(namespace) label_replace(kube_pod_info{namespace=\"observability\"}, \"pod_id\", \"$1\", \"uid\", \"(.*)\")) / 1000 * $energy_cost_kwh * 24 * 30", "legendFormat": "CAD/Month", "range": true, "refId": "A" @@ -390,7 +390,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "sum(rate(kepler_container_cpu_joules_total{container_name!=\"\", pod_id!=\"\", zone=\"package\"}[5m]) * on(pod_id) group_left(namespace) label_replace(kube_pod_info{namespace=\"observability\"}, \"pod_id\", \"$1\", \"uid\", \"(.*)\")) / 1000 * 0.15 * 24 * 365", + "expr": "sum(rate(kepler_container_cpu_joules_total{container_name!=\"\", pod_id!=\"\", zone=\"package\"}[5m]) * on(pod_id) group_left(namespace) label_replace(kube_pod_info{namespace=\"observability\"}, \"pod_id\", \"$1\", \"uid\", \"(.*)\")) / 1000 * $energy_cost_kwh * 24 * 365", "legendFormat": "CAD/Year", "range": true, "refId": "A" @@ -732,7 +732,48 @@ "k3s" ], "templating": { - "list": [] + "list": [ + { + "current": { + "selected": true, + "text": "0.15", + "value": "0.15" + }, + "hide": 0, + "label": "Energy Cost (CAD/kWh)", + "name": "energy_cost_kwh", + "options": [ + { + "selected": true, + "text": "0.15", + "value": "0.15" + } + ], + "query": "0.15", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": true, + "text": "150", + "value": "150" + }, + "hide": 0, + "label": "Carbon Intensity (g/kWh)", + "name": "carbon_intensity_kwh", + "options": [ + { + "selected": true, + "text": "150", + "value": "150" + } + ], + "query": "150", + "skipUrlSync": false, + "type": "custom" + } + ] }, "time": { "from": "now-1h", diff --git a/k3s/grafana/dashboards/sustainability-hub.json b/k3s/grafana/dashboards/sustainability-hub.json index 27bcde9..4657c1c 100644 --- a/k3s/grafana/dashboards/sustainability-hub.json +++ b/k3s/grafana/dashboards/sustainability-hub.json @@ -201,7 +201,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) * 0.15", + "expr": "sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) * ($carbon_intensity_kwh / 1000)", "legendFormat": "gCO2/h", "range": true, "refId": "A" @@ -262,7 +262,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) / 1000 * 0.15 * 24", + "expr": "sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) / 1000 * $energy_cost_kwh * 24", "legendFormat": "CAD/Day", "range": true, "refId": "A" @@ -323,7 +323,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "((sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) * 0.15 * 24 * 365) / 1000) / 21", + "expr": "((sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) * ($carbon_intensity_kwh / 1000) * 24 * 365) / 1000) / 21", "legendFormat": "Trees", "range": true, "refId": "A" @@ -512,7 +512,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) * 0.15", + "expr": "sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) * ($carbon_intensity_kwh / 1000)", "legendFormat": "Host Carbon", "refId": "A" }, @@ -522,7 +522,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "sum(rate(kepler_container_cpu_joules_total{container_name!=\"\", pod_id!=\"\", zone=\"package\"}[5m])) * 0.15", + "expr": "sum(rate(kepler_container_cpu_joules_total{container_name!=\"\", pod_id!=\"\", zone=\"package\"}[5m])) * ($carbon_intensity_kwh / 1000)", "legendFormat": "Total Pods Carbon", "refId": "B" } @@ -770,7 +770,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) / 1000 * 0.15 * 24 * 30", + "expr": "sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) / 1000 * $energy_cost_kwh * 24 * 30", "legendFormat": "CAD/Month", "range": true, "refId": "A" @@ -831,7 +831,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) / 1000 * 0.15 * 24 * 365", + "expr": "sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) / 1000 * $energy_cost_kwh * 24 * 365", "legendFormat": "CAD/Year", "range": true, "refId": "A" @@ -893,7 +893,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "(sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) * 0.15 * 24 * 365) / 1000", + "expr": "(sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) * ($carbon_intensity_kwh / 1000) * 24 * 365) / 1000", "legendFormat": "kgCO2/Year", "range": true, "refId": "A" @@ -955,7 +955,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "((sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) - sum(rate(kepler_container_cpu_joules_total{container_name!=\"\", pod_id!=\"\", zone=\"package\"}[5m]))) * 0.15 * 24 * 365) / 1000", + "expr": "((sum(rate(kepler_node_cpu_joules_total{zone=\"package\"}[5m])) - sum(rate(kepler_container_cpu_joules_total{container_name!=\"\", pod_id!=\"\", zone=\"package\"}[5m]))) * ($carbon_intensity_kwh / 1000) * 24 * 365) / 1000", "legendFormat": "kgCO2/Year", "range": true, "refId": "A" @@ -1086,7 +1086,7 @@ "uid": "prometheus-provisioned" }, "editorMode": "code", - "expr": "sum(rate(kepler_container_cpu_joules_total{container_name!=\"\", pod_id!=\"\", zone=\"package\"}[5m]) * on(pod_id) group_left(namespace) label_replace(kube_pod_info, \"pod_id\", \"$1\", \"uid\", \"(.*)\")) by (namespace) * 0.15", + "expr": "sum(rate(kepler_container_cpu_joules_total{container_name!=\"\", pod_id!=\"\", zone=\"package\"}[5m]) * on(pod_id) group_left(namespace) label_replace(kube_pod_info, \"pod_id\", \"$1\", \"uid\", \"(.*)\")) by (namespace) * ($carbon_intensity_kwh / 1000)", "legendFormat": "{{namespace}}", "refId": "A" } @@ -1170,7 +1170,48 @@ "finops" ], "templating": { - "list": [] + "list": [ + { + "current": { + "selected": true, + "text": "0.15", + "value": "0.15" + }, + "hide": 0, + "label": "Energy Cost (CAD/kWh)", + "name": "energy_cost_kwh", + "options": [ + { + "selected": true, + "text": "0.15", + "value": "0.15" + } + ], + "query": "0.15", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": true, + "text": "150", + "value": "150" + }, + "hide": 0, + "label": "Carbon Intensity (g/kWh)", + "name": "carbon_intensity_kwh", + "options": [ + { + "selected": true, + "text": "150", + "value": "150" + } + ], + "query": "150", + "skipUrlSync": false, + "type": "custom" + } + ] }, "time": { "from": "now-1h", diff --git a/k3s/grafana/dashboards/unit-economics.json b/k3s/grafana/dashboards/unit-economics.json index 79d668d..4c7fd7e 100644 --- a/k3s/grafana/dashboards/unit-economics.json +++ b/k3s/grafana/dashboards/unit-economics.json @@ -73,7 +73,7 @@ }, "editorMode": "code", "format": "table", - "rawSql": "SELECT SUM(value) as value FROM analytics_metrics WHERE kind = 'cost' AND feature_id = 'node-total' AND $__timeFilter(time);", + "rawSql": "SELECT SUM(value * $energy_cost_kwh / 3600000.0) as value FROM analytics_metrics WHERE kind = 'energy' AND feature_id = 'node-total' AND $__timeFilter(time);", "refId": "A" } ], @@ -133,7 +133,7 @@ }, "editorMode": "code", "format": "table", - "rawSql": "SELECT SUM(value) as value FROM analytics_metrics WHERE kind = 'carbon' AND feature_id = 'node-total' AND $__timeFilter(time);", + "rawSql": "SELECT SUM(value * $carbon_intensity_kwh / 3600000.0) as value FROM analytics_metrics WHERE kind = 'energy' AND feature_id = 'node-total' AND $__timeFilter(time);", "refId": "A" } ], @@ -463,7 +463,7 @@ }, "editorMode": "code", "format": "table", - "rawSql": "SELECT\n time AS \"time\",\n feature_id,\n value\nFROM analytics_metrics\nWHERE\n kind = 'carbon'\n AND feature_id != 'node-total'\n AND $__timeFilter(time)\nORDER BY 1;", + "rawSql": "SELECT\n time AS \"time\",\n feature_id,\n value * $carbon_intensity_kwh / 3600000.0 as value\nFROM analytics_metrics\nWHERE\n kind = 'energy'\n AND feature_id != 'node-total'\n AND $__timeFilter(time)\nORDER BY 1;", "refId": "A" } ], @@ -478,7 +478,48 @@ "unit-economics" ], "templating": { - "list": [] + "list": [ + { + "current": { + "selected": true, + "text": "0.15", + "value": "0.15" + }, + "hide": 0, + "label": "Energy Cost (CAD/kWh)", + "name": "energy_cost_kwh", + "options": [ + { + "selected": true, + "text": "0.15", + "value": "0.15" + } + ], + "query": "0.15", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": true, + "text": "150", + "value": "150" + }, + "hide": 0, + "label": "Carbon Intensity (g/kWh)", + "name": "carbon_intensity_kwh", + "options": [ + { + "selected": true, + "text": "150", + "value": "150" + } + ], + "query": "150", + "skipUrlSync": false, + "type": "custom" + } + ] }, "time": { "from": "now-24h", diff --git a/k3s/grafana/values.yaml b/k3s/grafana/values.yaml index fff8ca2..63e72fa 100644 --- a/k3s/grafana/values.yaml +++ b/k3s/grafana/values.yaml @@ -1,4 +1,6 @@ revisionHistoryLimit: 3 +podLabels: + app.kubernetes.io/feature: observability-ui admin: existingSecret: "grafana-admin-secret" diff --git a/k3s/loki/values.yaml b/k3s/loki/values.yaml index a641f6f..766c637 100644 --- a/k3s/loki/values.yaml +++ b/k3s/loki/values.yaml @@ -1,4 +1,6 @@ loki: + podLabels: + app.kubernetes.io/feature: observability-logs auth_enabled: false commonConfig: replication_factor: 1 diff --git a/k3s/minio/values.yaml b/k3s/minio/values.yaml index 7d1f183..b60071b 100644 --- a/k3s/minio/values.yaml +++ b/k3s/minio/values.yaml @@ -3,6 +3,9 @@ image: tag: RELEASE.2025-02-07T23-21-09Z pullPolicy: IfNotPresent +podLabels: + app.kubernetes.io/feature: storage-core + mode: standalone persistence: diff --git a/k3s/opentelemetry/values.yaml b/k3s/opentelemetry/values.yaml index 25fd67f..9fd6e1e 100644 --- a/k3s/opentelemetry/values.yaml +++ b/k3s/opentelemetry/values.yaml @@ -1,5 +1,8 @@ fullnameOverride: opentelemetry +podLabels: + app.kubernetes.io/feature: observability-otel + revisionHistoryLimit: 3 mode: deployment diff --git a/k3s/postgres/values.yaml b/k3s/postgres/values.yaml index d29220e..2bc687f 100644 --- a/k3s/postgres/values.yaml +++ b/k3s/postgres/values.yaml @@ -18,6 +18,8 @@ auth: adminPasswordKey: "postgres-password" primary: + podLabels: + app.kubernetes.io/feature: database-core pdb: create: true maxUnavailable: 1 diff --git a/k3s/tempo/values.yaml b/k3s/tempo/values.yaml index 6009f86..486d81c 100644 --- a/k3s/tempo/values.yaml +++ b/k3s/tempo/values.yaml @@ -6,6 +6,8 @@ persistence: storageClassName: local-path-retain tempo: + podLabels: + app.kubernetes.io/feature: observability-traces replicas: 1 resources: requests: diff --git a/k3s/thanos/values.yaml b/k3s/thanos/values.yaml index 24f53a3..3d2ae82 100644 --- a/k3s/thanos/values.yaml +++ b/k3s/thanos/values.yaml @@ -6,6 +6,9 @@ global: security: allowInsecureImages: true # Allow non-bitnami image +commonLabels: + app.kubernetes.io/feature: observability-infra + # Use same image as hand-written manifest (stable, proven) image: registry: quay.io