feat: add manual Docker Compose deployment configuration

USHER-PB · USHER-PB · commit 1438b4faf35a · 2026-01-08T17:55:13.000+01:00
diff --git a/lgtm-stack/Manual/alloy-config.alloy b/lgtm-stack/Manual/alloy-config.alloy
@@ -0,0 +1,80 @@
+// ==============================================================================
+// Grafana Alloy Configuration for NetBird Monitoring
+// ==============================================================================
+
+logging {
+  level  = "info"
+  format = "logfmt"
+}
+
+// ==============================================================================
+// DOCKER CONTAINER LOGS
+// ==============================================================================
+
+discovery.docker "containers" {
+  host = "unix:///var/run/docker.sock"
+}
+
+discovery.relabel "containers" {
+  targets = discovery.docker.containers.targets
+  
+  // Essential labels only: container name, compose project, service name, host
+  rule {
+    source_labels = ["__meta_docker_container_name"]
+    regex         = "/(.*)"
+    target_label  = "container"
+  }
+  
+  rule {
+    source_labels = ["__meta_docker_container_label_com_docker_compose_project"]
+    target_label  = "compose_project"
+  }
+  
+  rule {
+    source_labels = ["__meta_docker_container_label_com_docker_compose_service"]
+    target_label  = "service"
+  }
+  
+  rule {
+    target_label = "host"
+    replacement  = constants.hostname
+  }
+}
+
+loki.source.docker "docker_logs" {
+  host       = "unix:///var/run/docker.sock"
+  targets    = discovery.relabel.containers.output
+  forward_to = [loki.process.docker_logs.receiver]
+}
+
+loki.process "docker_logs" {
+  forward_to = [loki.write.loki.receiver]
+  stage.docker {}
+}
+
+// ==============================================================================
+// SYSTEM LOGS (journald)
+// ==============================================================================
+
+loki.source.journal "system_logs" {
+  forward_to = [loki.write.loki.receiver]
+  labels = {
+    job = "systemd-journal",
+  }
+}
+
+// ==============================================================================
+// LOKI WRITE ENDPOINT
+// (metrics are scraped directly by Prometheus; Alloy only handles logs here)
+// ==============================================================================
+
+loki.write "loki" {
+  endpoint {
+    url = "http://loki:3100/loki/api/v1/push"
+  }
+  
+  external_labels = {
+    cluster = "netbird-selfhosted",
+    env     = "production",
+  }
+}
diff --git a/lgtm-stack/Manual/docker-compose.yaml b/lgtm-stack/Manual/docker-compose.yaml
@@ -0,0 +1,229 @@
+networks:
+  monitoring:
+    driver: bridge
+  # Connect to NetBird's main Docker network created by the NetBird compose stack
+  netbird:
+    external: true
+    name: netbird_netbird
+
+volumes:
+  loki_data: {}
+  prometheus_data: {}
+  grafana_data: {}
+  netbird_management_data:
+    external: true
+    name: netbird_netbird_management
+
+services:
+  # =============================================================================
+  # LOKI - Log Aggregation
+  # =============================================================================
+  loki:
+    image: grafana/loki:3.0.0
+    container_name: loki
+    user: "0:0"
+    restart: unless-stopped
+    ports:
+      - "3100:3100"
+    command: -config.file=/etc/loki/local-config.yaml
+    volumes:
+      - ./loki-config.yaml:/etc/loki/local-config.yaml:ro
+      - loki_data:/loki
+    networks:
+      - monitoring
+      - netbird
+    healthcheck:
+      test: [ "CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3100/ready" ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 10s
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "10m"
+        max-file: "3"
+
+  # =============================================================================
+  # PROMETHEUS - Metrics Storage
+  # =============================================================================
+  prometheus:
+    image: prom/prometheus:v2.54.1
+    container_name: prometheus
+    user: "0:0"
+    restart: unless-stopped
+    ports:
+      - "9090:9090"
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--storage.tsdb.retention.time=30d'
+      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
+      - '--web.console.templates=/usr/share/prometheus/consoles'
+      - '--web.enable-lifecycle'
+      - '--web.enable-admin-api'
+    volumes:
+      - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
+      - prometheus_data:/prometheus
+    networks:
+      - monitoring
+      - netbird
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    healthcheck:
+      test: [ "CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/healthy" ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "10m"
+        max-file: "3"
+
+  # =============================================================================
+  # GRAFANA - Visualization
+  # =============================================================================
+  grafana:
+    image: grafana/grafana:11.3.0
+    container_name: grafana
+    user: "0:0"
+    restart: unless-stopped
+    ports:
+      - "3000:3000"
+    environment:
+      - GF_SECURITY_ADMIN_USER=admin
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_PATHS_PROVISIONING=/etc/grafana/provisioning
+      - GF_FEATURE_TOGGLES_ENABLE=publicDashboards
+      - GF_LOG_LEVEL=info
+      - GF_AUTH_ANONYMOUS_ENABLED=false
+    volumes:
+      - grafana_data:/var/lib/grafana
+    networks:
+      - monitoring
+      - netbird
+    depends_on:
+      loki:
+        condition: service_healthy
+      prometheus:
+        condition: service_healthy
+    healthcheck:
+      test: [ "CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/api/health" ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "10m"
+        max-file: "3"
+
+  # =============================================================================
+  # ALLOY - Telemetry Collection (logs only, no HTTP healthcheck)
+  # =============================================================================
+  alloy:
+    image: grafana/alloy:v1.4.2
+    container_name: alloy
+    user: "0:0"
+    restart: unless-stopped
+    ports:
+      - "12345:12345"
+    command:
+      - run
+      - --server.http.listen-addr=0.0.0.0:12345
+      - --storage.path=/var/lib/alloy/data
+      - /etc/alloy/config.alloy
+    volumes:
+      - ./alloy-config.alloy:/etc/alloy/config.alloy:ro
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+      - /var/log:/var/log:ro
+      - /sys:/sys:ro
+      - /proc:/proc:ro
+    networks:
+      - monitoring
+      - netbird
+    depends_on:
+      loki:
+        condition: service_healthy
+      prometheus:
+        condition: service_healthy
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "10m"
+        max-file: "3"
+
+  # =============================================================================
+  # NODE EXPORTER - Host & cgroup metrics
+  # =============================================================================
+  node-exporter:
+    image: prom/node-exporter:v1.8.2
+    container_name: node-exporter
+    restart: unless-stopped
+    ports:
+      - "9100:9100"
+    command:
+      - '--path.procfs=/host/proc'
+      - '--path.sysfs=/host/sys'
+      - '--path.rootfs=/rootfs'
+      - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
+      - '--collector.netclass.ignored-devices=^(veth.*|br.*|docker.*|virbr.*|lo)$$'
+      - '--collector.netdev.device-exclude=^(veth.*|br.*|docker.*|virbr.*|lo)$$'
+      - '--collector.cgroups'
+    volumes:
+      - /proc:/host/proc:ro
+      - /sys:/host/sys:ro
+      - /:/rootfs:ro
+    networks:
+      - monitoring
+    pid: host
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "10m"
+        max-file: "3"
+
+  # =============================================================================
+  # CONTAINER CGROUP METRICS EXPORTER
+  # =============================================================================
+  container-metrics:
+    image: ghcr.io/mosquito/cgroups-exporter:latest
+    container_name: container-metrics
+    restart: unless-stopped
+    # On cgroup v2 + systemd, Docker container cgroups typically appear under
+    # /sys/fs/cgroup/system.slice/docker-<long-id>.scope/. We mount the host
+    # /sys tree as /host_sys and point cgroups-exporter at a glob that matches
+    # those scopes.
+    command:
+      - cgroups-exporter
+      - --cgroups-path
+      - "/host_sys/fs/cgroup/system.slice/docker-*.scope/"
+    volumes:
+      - /sys/:/host_sys:ro
+    networks:
+      - monitoring
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "10m"
+        max-file: "3"
+
+  # =============================================================================
+  # NETBIRD EVENTS EXPORTER - Rust Version
+  # =============================================================================
+  netbird-events-exporter:
+    image: ghcr.io/onelrian/signal:latest
+    container_name: netbird-events-exporter
+    restart: unless-stopped
+    environment:
+      - NETBIRD_API_URL=${NETBIRD_API_URL:-https://${NETBIRD_DOMAIN}/api}
+      - NETBIRD_API_TOKEN=${NETBIRD_PAT}
+      - LOKI_URL=http://loki:3100
+      - RUST_LOG=info
+    depends_on:
+      loki:
+        condition: service_healthy
+    networks:
+      - monitoring
+      - netbird
diff --git a/lgtm-stack/Manual/loki-config.yaml b/lgtm-stack/Manual/loki-config.yaml
@@ -0,0 +1,49 @@
+auth_enabled: false
+
+server:
+  http_listen_port: 3100
+  grpc_listen_port: 9096
+
+common:
+  instance_addr: 127.0.0.1
+  path_prefix: /loki
+  storage:
+    filesystem:
+      chunks_directory: /loki/chunks
+      rules_directory: /loki/rules
+  replication_factor: 1
+  ring:
+    kvstore:
+      store: inmemory
+
+schema_config:
+  configs:
+    - from: 2024-01-01
+      store: tsdb
+      object_store: filesystem
+      schema: v13
+      index:
+        prefix: index_
+        period: 24h
+
+limits_config:
+  retention_period: 720h  # 30 days
+  reject_old_samples: true
+  reject_old_samples_max_age: 168h
+  ingestion_rate_mb: 100
+  ingestion_burst_size_mb: 200
+  per_stream_rate_limit: 50MB
+  per_stream_rate_limit_burst: 100MB
+  max_entries_limit_per_query: 100000
+  max_streams_per_user: 0  # unlimited
+
+compactor:
+  working_directory: /loki/compactor
+  compaction_interval: 10m
+  retention_enabled: true
+  retention_delete_delay: 2h
+  retention_delete_worker_count: 150
+  delete_request_store: filesystem
+
+analytics:
+  reporting_enabled: false
diff --git a/lgtm-stack/Manual/prometheus.yml b/lgtm-stack/Manual/prometheus.yml