diff --git a/cmd/collectors/main.go b/cmd/collectors/main.go index fc63624..0f76e38 100644 --- a/cmd/collectors/main.go +++ b/cmd/collectors/main.go @@ -56,8 +56,8 @@ const ( queryCPUUtil = `100 * (1 - avg by (instance) (rate(node_cpu_seconds_total{mode="idle", job="kubernetes-service-endpoints"}[5m])))` queryRAMUtil = `100 * (1 - (node_memory_MemAvailable_bytes{job="kubernetes-service-endpoints"} / node_memory_MemTotal_bytes{job="kubernetes-service-endpoints"}))` queryDiskUsed = `100 * (1 - node_filesystem_avail_bytes{mountpoint="/", job="kubernetes-service-endpoints"} / node_filesystem_size_bytes{mountpoint="/", job="kubernetes-service-endpoints"})` - queryNetRX = `node_network_receive_bytes_total{device="enp5s0", job="kubernetes-service-endpoints"}` - queryNetTX = `node_network_transmit_bytes_total{device="enp5s0", job="kubernetes-service-endpoints"}` + queryNetRX = `node_network_receive_bytes_total{device="eno1", job="kubernetes-service-endpoints"}` + queryNetTX = `node_network_transmit_bytes_total{device="eno1", job="kubernetes-service-endpoints"}` queryTemp = `node_hwmon_temp_celsius{job="kubernetes-service-endpoints"}` ) diff --git a/docs/notes/k3s-operations.md b/docs/notes/k3s-operations.md index 8f5e6b6..d7c7277 100644 --- a/docs/notes/k3s-operations.md +++ b/docs/notes/k3s-operations.md @@ -71,14 +71,18 @@ nix-shell --run "tofu apply" Since this is a custom internal service, the image must be built and sideloaded into k3s. ```bash -# 1. Build locally -docker build -t collectors:v0.1.0 -f docker/collectors/Dockerfile . +# 1. Build locally (using podman) +podman build -t collectors:v0.1.0 -f docker/collectors/Dockerfile . # 2. Export and Import -docker save -o collectors.tar collectors:v0.1.0 +podman save -o collectors.tar localhost/collectors:v0.1.0 sudo k3s ctr images import collectors.tar -# 3. Cleanup +# 3. Tag for K3s local lookup +sudo k3s ctr images tag localhost/collectors:v0.1.0 collectors:v0.1.0 +sudo k3s ctr images tag localhost/collectors:v0.1.0 docker.io/library/collectors:v0.1.0 + +# 4. Cleanup rm collectors.tar ``` @@ -175,24 +179,24 @@ The platform utilizes **NodePort** to bridge host-based services (MCP agents, pr ## 📊 Resource Limits Summary -- *Last Updated: 2026-02-22* +- *Last Updated: 2026-03-09 (High Performance Profile)* | Component | CPU Req | RAM Req | CPU Limit | RAM Limit | Purpose | | :--- | :--- | :--- | :--- | :--- | :--- | | **collectors** | 5m | 20Mi | 50m | 80Mi | Telemetry Collection | -| **grafana** | 10m | 150Mi | 100m | 250Mi | Visualization | -| **loki** | 100m | 256Mi | 300m | 640Mi | Log Storage | -| **minio** | 100m | 256Mi | 200m | 512Mi | S3 Storage Backend | -| **opentelemetry** | 20m | 100Mi | 150m | 256Mi | Trace Gateway | -| **postgres** | 50m | 200Mi | 200m | 400Mi | Relational Data | -| **prometheus** | 20m | 400Mi | 100m | 600Mi | Metrics Storage | -| **tempo** | 50m | 256Mi | 200m | 512Mi | Trace Storage | -| **thanos** | 10m | 50Mi | 50m | 150Mi | Long-term Metrics Access | +| **grafana** | 50m | 256Mi | 200m | 512Mi | Visualization | +| **loki** | 200m | 512Mi | 1000m | 2Gi | Log Storage | +| **minio** | 200m | 512Mi | 500m | 1Gi | S3 Storage Backend | +| **opentelemetry** | 50m | 200Mi | 300m | 512Mi | Trace Gateway | +| **postgres** | 100m | 512Mi | 500m | 1Gi | Relational Data | +| **prometheus** | 100m | 1Gi | 500m | 2Gi | Metrics Storage | +| **tempo** | 100m | 512Mi | 500m | 1Gi | Trace Storage | +| **thanos** | 50m | 128Mi | 200m | 512Mi | Long-term Metrics Access | **Understanding Usage Totals:** -- **Mini Total (365m CPU / 1.61Gi RAM)**: The sum of all *Requests* (guaranteed resources). -- **Max Total (1.35 Cores / 3.32Gi RAM)**: The sum of all *Limits* (burst ceiling). +- **Mini Total (~0.86 Cores / 4.6Gi RAM)**: The sum of all *Requests* (guaranteed resources). +- **Max Total (~3.75 Cores / 9.6Gi RAM)**: The sum of all *Limits* (burst ceiling). --- diff --git a/go.mod b/go.mod index d89a33e..72b0253 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module observability-hub -go 1.26.0 +go 1.25.7 require ( github.com/DATA-DOG/go-sqlmock v1.5.2 diff --git a/internal/proxy/webhook.go b/internal/proxy/webhook.go index 701c1ec..121f274 100644 --- a/internal/proxy/webhook.go +++ b/internal/proxy/webhook.go @@ -261,8 +261,12 @@ func WebhookHandler(w http.ResponseWriter, r *http.Request) { "event", eventType, "merged", merged, ) - // We use an absolute path to the script for reliability - cmd := exec.Command("/home/server/software/observability-hub/scripts/gitops_sync.sh", repo) + // Try to get script path from environment, otherwise use relative path + scriptPath := os.Getenv("GITOPS_SYNC_SCRIPT") + if scriptPath == "" { + scriptPath = "scripts/gitops_sync.sh" + } + cmd := exec.Command(scriptPath, repo) output, err := cmd.CombinedOutput() if err != nil { syncSpan.RecordError(err) diff --git a/k3s/grafana/values.yaml b/k3s/grafana/values.yaml index fff2114..f6a0154 100644 --- a/k3s/grafana/values.yaml +++ b/k3s/grafana/values.yaml @@ -34,7 +34,7 @@ initChownData: securityContext: runAsNonRoot: false runAsUser: 0 - allowPrivilegeEscalation: false + allowPrivilegeEscalation: true # Needs to be able to chown the data volume readOnlyRootFilesystem: false capabilities: @@ -122,10 +122,10 @@ service: resources: requests: cpu: 50m - memory: 128Mi + memory: 256Mi limits: cpu: 200m - memory: 384Mi + memory: 512Mi # Security settings and UI tweaks grafana.ini: diff --git a/k3s/loki/values.yaml b/k3s/loki/values.yaml index 6b85a07..ad6c0b2 100644 --- a/k3s/loki/values.yaml +++ b/k3s/loki/values.yaml @@ -60,11 +60,11 @@ singleBinary: name: minio-secret resources: requests: - cpu: 100m - memory: 256Mi + cpu: 200m + memory: 512Mi limits: - cpu: 300m - memory: 640Mi + cpu: 1000m + memory: 2Gi podSecurityContext: fsGroup: 10001 runAsGroup: 10001 diff --git a/k3s/minio/values.yaml b/k3s/minio/values.yaml index 0ae2063..881a09b 100644 --- a/k3s/minio/values.yaml +++ b/k3s/minio/values.yaml @@ -22,11 +22,11 @@ service: resources: requests: - memory: 256Mi - cpu: 100m - limits: memory: 512Mi cpu: 200m + limits: + memory: 1Gi + cpu: 500m # Deployment Pod Security Context securityContext: diff --git a/k3s/opentelemetry/values.yaml b/k3s/opentelemetry/values.yaml index 8d3c236..25fd67f 100644 --- a/k3s/opentelemetry/values.yaml +++ b/k3s/opentelemetry/values.yaml @@ -13,11 +13,11 @@ image: resources: requests: - cpu: 20m - memory: 100Mi + cpu: 50m + memory: 200Mi limits: - cpu: 150m - memory: 256Mi + cpu: 300m + memory: 512Mi podSecurityContext: fsGroup: 10001 @@ -82,9 +82,9 @@ alternateConfig: processors: [batch, resource] exporters: [loki] -# Expose as NodePort so host-level systemd services (e.g. proxy) can reach the collector +# Expose as LoadBalancer so host-level systemd services (e.g. proxy) can reach the collector via localhost service: - type: NodePort + type: LoadBalancer ports: otlp: diff --git a/k3s/postgres/values.yaml b/k3s/postgres/values.yaml index 9b9f893..5c9b75a 100644 --- a/k3s/postgres/values.yaml +++ b/k3s/postgres/values.yaml @@ -24,12 +24,12 @@ primary: # Fix for linter: explicitly set unhealthyPodEvictionPolicy # Note: bitnami chart v18.3.0 uses this key unhealthyPodEvictionPolicy: "AlwaysAllow" - + podSecurityContext: enabled: true fsGroup: 1001 fsGroupChangePolicy: Always - + containerSecurityContext: enabled: true readOnlyRootFilesystem: true @@ -39,35 +39,35 @@ primary: capabilities: drop: - ALL - + resources: requests: - cpu: 50m - memory: 200Mi + cpu: 100m + memory: 512Mi limits: - cpu: 200m - memory: 400Mi - + cpu: 500m + memory: 1Gi + persistence: enabled: true storageClass: local-path size: 10Gi - + extraVolumes: - name: run-volume emptyDir: {} extraVolumeMounts: - name: run-volume mountPath: /var/run/postgresql - + service: - type: NodePort + type: LoadBalancer nodePorts: postgresql: 30432 - + readReplicas: enabled: false - + volumePermissions: enabled: true image: @@ -87,3 +87,4 @@ volumePermissions: runAsUser: 0 runAsNonRoot: false readOnlyRootFilesystem: true + diff --git a/k3s/prometheus/values.yaml b/k3s/prometheus/values.yaml index d850ed1..4a79d22 100644 --- a/k3s/prometheus/values.yaml +++ b/k3s/prometheus/values.yaml @@ -10,11 +10,11 @@ server: storage.tsdb.max-block-duration: 2h resources: requests: - cpu: 20m - memory: 400Mi - limits: cpu: 100m - memory: 600Mi + memory: 1Gi + limits: + cpu: 500m + memory: 2Gi thanos: sidecar: enabled: false diff --git a/k3s/tempo/values.yaml b/k3s/tempo/values.yaml index 592a838..9d7fc8a 100644 --- a/k3s/tempo/values.yaml +++ b/k3s/tempo/values.yaml @@ -9,11 +9,11 @@ tempo: replicas: 1 resources: requests: - cpu: 50m - memory: 256Mi - limits: - cpu: 200m + cpu: 100m memory: 512Mi + limits: + cpu: 500m + memory: 1Gi # Enable read-only root filesystem securityContext: diff --git a/k3s/thanos/values.yaml b/k3s/thanos/values.yaml index 44b52db..ed8c064 100644 --- a/k3s/thanos/values.yaml +++ b/k3s/thanos/values.yaml @@ -26,11 +26,11 @@ storegateway: resources: requests: - cpu: 10m - memory: 50Mi - limits: cpu: 50m - memory: 150Mi + memory: 128Mi + limits: + cpu: 200m + memory: 512Mi podSecurityContext: enabled: true @@ -77,11 +77,11 @@ compactor: resources: requests: - cpu: 10m - memory: 50Mi - limits: cpu: 50m - memory: 150Mi + memory: 128Mi + limits: + cpu: 200m + memory: 512Mi podSecurityContext: enabled: true diff --git a/makefiles/common.mk b/makefiles/common.mk index 5d5a0fe..0093db6 100644 --- a/makefiles/common.mk +++ b/makefiles/common.mk @@ -3,6 +3,9 @@ NS ?= observability KC ?= kubectl -n $(NS) HELM ?= helm --namespace $(NS) +# Container Engine (Default to Podman) +DOCKER ?= podman + # Dynamic Nix Detection USE_NIX = $(shell if command -v nix-shell >/dev/null 2>&1 && [ -z "$$IN_NIX_SHELL" ] && [ "$$GITHUB_ACTIONS" != "true" ]; then echo "yes"; else echo "no"; fi) @@ -27,7 +30,7 @@ adr: # Markdown Linting lint: - docker run --rm -v "$(PWD):/data" -w /data $(LINT_IMAGE) --fix "**/*.md" + $(DOCKER) run --rm -v "$(PWD):/data" -w /data $(LINT_IMAGE) --fix "**/*.md" # Configuration Linting (HCL & GitHub Actions) lint-configs: diff --git a/makefiles/k3s.mk b/makefiles/k3s.mk index 365c43a..d409eef 100644 --- a/makefiles/k3s.mk +++ b/makefiles/k3s.mk @@ -1,6 +1,8 @@ # K3s Orchestration .PHONY: build-collectors k3s-collectors-up k3s-status k3s-df k3s-prune k3s-logs-% k3s-backup-% kube-lint +BACKUP_DIR ?= /home/server2/backups/manual + # Maintenance kube-lint: @echo "Linting Kubernetes manifests..." @@ -19,11 +21,21 @@ k3s-prune: build-collectors: @echo "Building Collectors image..." - docker build -t collectors:v0.1.0 -f docker/collectors/Dockerfile . - docker save -o collectors.tar collectors:v0.1.0 + $(DOCKER) build -t collectors:v0.1.0 -f docker/collectors/Dockerfile . + $(DOCKER) save -o collectors.tar localhost/collectors:v0.1.0 sudo k3s ctr images import collectors.tar + sudo k3s ctr images tag localhost/collectors:v0.1.0 collectors:v0.1.0 + sudo k3s ctr images tag localhost/collectors:v0.1.0 docker.io/library/collectors:v0.1.0 rm collectors.tar +build-postgres: + @echo "Building custom Postgres image..." + $(DOCKER) build -t postgres-pod:17 -f docker/postgres/Dockerfile . + $(DOCKER) save -o postgres-pod.tar localhost/postgres-pod:17 + sudo k3s ctr images import postgres-pod.tar + sudo k3s ctr images tag localhost/postgres-pod:17 docker.io/library/postgres-pod:17 + rm postgres-pod.tar + k3s-collectors-up: @echo "Regenerating Collectors manifest..." $(NIX_RUN) "helm template collectors k3s/collectors -f k3s/collectors/values.yaml --namespace $(NS) > k3s/collectors/manifest.yaml" @@ -56,7 +68,7 @@ k3s-backup-%: $(KC) scale --replicas=0 $$RESOURCE; \ echo "Waiting for pods to terminate..."; \ $(KC) wait --for=delete pod -l $$( $(KC) get $$RESOURCE -o jsonpath='{.spec.selector.matchLabels}' | jq -r 'to_entries | .[] | .key + "=" + .value' | paste -sd "," - ) --timeout=60s || true; \ - BACKUP_DIR="/home/server/backups/manual"; \ + BACKUP_DIR="$(BACKUP_DIR)"; \ sudo mkdir -p $$BACKUP_DIR; \ TIMESTAMP=$$(date +%Y%m%d_%H%M%S); \ BACKUP_PATH="$$BACKUP_DIR/$*_"$$TIMESTAMP".tar.gz"; \ diff --git a/makefiles/systemd.mk b/makefiles/systemd.mk index 1130717..4e8ca8d 100644 --- a/makefiles/systemd.mk +++ b/makefiles/systemd.mk @@ -2,22 +2,23 @@ # Define exact units to install ACTIVE_UNITS = proxy.service tailscale-gate.service openbao.service \ - traffic-generator.service traffic-generator.timer \ ingestion.service ingestion.timer \ mcp-telemetry.service .PHONY: install-services reload-services uninstall-services bao-status install-services: - @echo "🔗 Linking active units..." + @echo "📦 Installing active units..." @for unit in $(ACTIVE_UNITS); do \ - sudo ln -sf $(CURDIR)/systemd/$$unit /etc/systemd/system/$$unit; \ + sudo rm -f /etc/systemd/system/$$unit; \ + sudo cp $(CURDIR)/systemd/$$unit /etc/systemd/system/$$unit; \ + sudo chmod 644 /etc/systemd/system/$$unit; \ done @sudo systemctl daemon-reload @echo "🟢 Enabling services..." @sudo systemctl enable --now proxy.service tailscale-gate.service openbao.service @echo "⏰ Enabling timers..." - @sudo systemctl enable --now ingestion.timer traffic-generator.timer + @sudo systemctl enable --now ingestion.timer reload-services: @echo "Reloading systemd units..." diff --git a/scripts/generate_traffic.sh b/scripts/generate_traffic.sh deleted file mode 100755 index 73f4abb..0000000 --- a/scripts/generate_traffic.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/bin/bash -PROXY_URL="http://localhost:8085" -REGIONS=("us-east-1" "us-west-2" "ca-central-1" "eu-west-1" "eu-central-1" "uk-south-1" "asia-east-1" "asia-southeast-1" "asia-south-1") -TIMEZONES=("America/Edmonton" "America/Vancouver" "America/Toronto" "Europe/Dublin" "Europe/Frankfurt" "Europe/London" "Asia/Taipei" "Asia/Singapore" "Asia/Kolkata") -DEVICES=("iphone" "android" "browser" "sensor-node") -NETWORKS=("wifi" "5g" "4g" "ethernet") - -# Log helper using jq for safe JSON generation -# Ensures newlines and quotes in 'msg' are properly escaped -log() { - local level=$1 - local msg=$2 - local json_payload - - # Generate JSON payload - json_payload=$(jq -n -c \ - --arg service "traffic-generator" \ - --arg level "$level" \ - --arg msg "$msg" \ - '{service: $service, level: $level, msg: $msg}') - - # 1. Output to stdout - echo "$json_payload" - - # 2. Send directly to system journal - if command -v logger >/dev/null 2>&1; then - logger -t "traffic-generator" "$json_payload" || true - fi -} - -generate_cycle() { - local mode=$1 - local include_health=${2:-true} - local r_idx=$(( RANDOM % ${#REGIONS[@]} )) - local d_idx=$(( RANDOM % ${#DEVICES[@]} )) - local n_idx=$(( RANDOM % ${#NETWORKS[@]} )) - local hex_id=$(openssl rand -hex 4) - - # Baseline health check (optional) - if [ "$include_health" = "true" ]; then - curl -s -o /dev/null "$PROXY_URL/api/health" - fi - - # Decide if this cycle should fail (Randomized Error Injection) - local should_fail=false - if [ "$mode" = "burst" ] && [ -n "$BURST_FAIL_IDS" ]; then - for id in $BURST_FAIL_IDS; do - if [ "$id" -eq "$BURST_CYCLE_COUNT" ]; then - should_fail=true - break - fi - done - fi - - if [ "$should_fail" = "true" ]; then - # Trigger a real application failure (Malformed JSON) - curl -s -X POST "$PROXY_URL/api/trace/synthetic/fail-$hex_id" \ - -H "Content-Type: application/json" \ - -H "X-Traffic-Mode: burst-fail" \ - -d "{\"region\": \"broken-payload" > /dev/null - log "WARN" "Injected failure for fail-$hex_id (burst-fail)" - else - # Standard Synthetic Trace - curl -s -X POST "$PROXY_URL/api/trace/synthetic/$hex_id" \ - -H "Content-Type: application/json" \ - -H "X-Traffic-Mode: $mode" \ - -d "{ - \"region\": \"${REGIONS[$r_idx]}\", - \"timezone\": \"${TIMEZONES[$r_idx]}\", - \"device\": \"${DEVICES[$d_idx]}\", - \"network_type\": \"${NETWORKS[$n_idx]}\" - }" > /dev/null - log "INFO" "Generated synthetic trace for $hex_id in ${REGIONS[$r_idx]} ($mode)" - fi -} - -case "$1" in - --continuous) - count=1 - while true; do - generate_cycle "continuous" "true" - echo "✅ Cycle $count complete. Sleeping for 60s..." - ((count++)); sleep 60 - done ;; - --burst) - echo "🚀 Burst mode: Running 20 rapid cycles (Pure Trace Burst)..." - - # Randomly select 1-5 indices to fail - num_fails=$(( RANDOM % 5 + 1 )) - BURST_FAIL_IDS=$(shuf -i 1-20 -n $num_fails | xargs) - echo "⚠️ Injecting $num_fails failures at cycles: $BURST_FAIL_IDS" - - for i in {1..20}; do - BURST_CYCLE_COUNT=$i - # Force health check to false for pure burst - generate_cycle "burst" "false" - sleep 0.5 - done ;; - *) - for i in {1..3}; do - generate_cycle "cron" "true" - sleep 1 - done ;; -esac diff --git a/scripts/gitops_sync.sh b/scripts/gitops_sync.sh index b772e92..6d91a1d 100755 --- a/scripts/gitops_sync.sh +++ b/scripts/gitops_sync.sh @@ -6,7 +6,7 @@ SERVICE_NAME="gitops.sync" JOB_NAME="bash.automation" REPO_NAME=${1:-"observability-hub"} # Default to observability-hub as per original intent -BASE_DIR="/home/server/software" +BASE_DIR=$(cd "$(dirname "$0")/../.." && pwd) # OTel-aligned structured logging function log() { diff --git a/systemd/ingestion.service b/systemd/ingestion.service index 45b8ad8..6ec2b5a 100644 --- a/systemd/ingestion.service +++ b/systemd/ingestion.service @@ -5,10 +5,10 @@ After=network.target [Service] Type=oneshot -User=server -WorkingDirectory=/home/server/software/observability-hub -EnvironmentFile=/home/server/software/observability-hub/.env -ExecStart=/home/server/software/observability-hub/bin/ingestion +User=server2 +WorkingDirectory=/home/server2/software/observability-hub +EnvironmentFile=/home/server2/software/observability-hub/.env +ExecStart=/home/server2/software/observability-hub/bin/ingestion StandardOutput=journal StandardError=journal diff --git a/systemd/mcp-telemetry.service b/systemd/mcp-telemetry.service index 07ac636..ef17d00 100644 --- a/systemd/mcp-telemetry.service +++ b/systemd/mcp-telemetry.service @@ -4,10 +4,10 @@ After=network.target [Service] Type=simple -User=server -WorkingDirectory=/home/server/software/observability-hub -EnvironmentFile=/home/server/software/observability-hub/.env -ExecStart=/home/server/software/observability-hub/bin/mcp_telemetry +User=server2 +WorkingDirectory=/home/server2/software/observability-hub +EnvironmentFile=/home/server2/software/observability-hub/.env +ExecStart=/home/server2/software/observability-hub/bin/mcp_telemetry Restart=always RestartSec=5 StandardOutput=journal diff --git a/systemd/openbao.service b/systemd/openbao.service index 42f2098..4f3b5fc 100644 --- a/systemd/openbao.service +++ b/systemd/openbao.service @@ -5,10 +5,10 @@ After=network.target [Service] Type=simple -User=server -WorkingDirectory=/home/server/software/observability-hub -# Use nix-shell to ensure 'bao' is available in the environment -ExecStart=/nix/var/nix/profiles/default/bin/nix-shell --run "bao server -config=config/bao-local.hcl" +User=server2 +WorkingDirectory=/home/server2/software/observability-hub +# Run bao directly from the system path +ExecStart=/usr/bin/bao server -config=config/bao-local.hcl Restart=on-failure RestartSec=5 StandardOutput=journal diff --git a/systemd/proxy.service b/systemd/proxy.service index 26526f7..fb49132 100644 --- a/systemd/proxy.service +++ b/systemd/proxy.service @@ -1,13 +1,13 @@ [Unit] Description=Observability Hub Proxy Server -After=network.target postgresql.service +After=network.target [Service] Type=simple -User=server -WorkingDirectory=/home/server/software/observability-hub -EnvironmentFile=/home/server/software/observability-hub/.env -ExecStart=/home/server/software/observability-hub/bin/proxy_server +User=server2 +WorkingDirectory=/home/server2/software/observability-hub +EnvironmentFile=/home/server2/software/observability-hub/.env +ExecStart=/home/server2/software/observability-hub/bin/proxy_server Restart=always RestartSec=5 StandardOutput=journal diff --git a/systemd/tailscale-gate.service b/systemd/tailscale-gate.service index fdaa67c..28fbbe8 100644 --- a/systemd/tailscale-gate.service +++ b/systemd/tailscale-gate.service @@ -1,10 +1,9 @@ [Unit] Description=Tailscale Funnel Gatekeeper Loop -After=docker.service tailscaled.service -Requires=docker.service tailscaled.service +After=tailscaled.service [Service] -ExecStart=/home/server/software/observability-hub/scripts/tailscale_gate.sh +ExecStart=/home/server2/software/observability-hub/scripts/tailscale_gate.sh Restart=always RestartSec=60 # Ensures the script is prioritized during high RAM usage diff --git a/systemd/traffic-generator.service b/systemd/traffic-generator.service deleted file mode 100644 index 98700c4..0000000 --- a/systemd/traffic-generator.service +++ /dev/null @@ -1,12 +0,0 @@ -[Unit] -Description=Synthetic Traffic Generator (Single Run) -After=network.target proxy.service - -[Service] -Type=oneshot -User=server -WorkingDirectory=/home/server/software/observability-hub -# Calling without arguments triggers the default (*) case in the script -ExecStart=/home/server/software/observability-hub/scripts/generate_traffic.sh -StandardOutput=journal -StandardError=journal diff --git a/systemd/traffic-generator.timer b/systemd/traffic-generator.timer deleted file mode 100644 index df89349..0000000 --- a/systemd/traffic-generator.timer +++ /dev/null @@ -1,10 +0,0 @@ -[Unit] -Description=Timer for Synthetic Traffic Generation (30 min) - -[Timer] -OnCalendar=*:0/30:00 -AccuracySec=1s -Persistent=true - -[Install] -WantedBy=timers.target diff --git a/tofu/thanos.tf b/tofu/thanos.tf index d1bfa3c..b05c41d 100644 --- a/tofu/thanos.tf +++ b/tofu/thanos.tf @@ -5,7 +5,14 @@ resource "helm_release" "thanos" { version = "17.3.1" namespace = kubernetes_namespace.observability.metadata[0].name - values = [file("${path.module}/../k3s/thanos/values.yaml")] + values = [ + file("${path.module}/../k3s/thanos/values.yaml"), + yamlencode({ + query = { + extraArgs = ["--endpoint=prometheus-thanos-grpc.observability.svc.cluster.local:10901"] + } + }) + ] depends_on = [kubernetes_namespace.observability] }