Skip to content

Commit 1438b4f

Browse files
committed
feat: add manual Docker Compose deployment configuration
1 parent c8fae14 commit 1438b4f

File tree

4 files changed

+420
-0
lines changed

4 files changed

+420
-0
lines changed
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// ==============================================================================
2+
// Grafana Alloy Configuration for NetBird Monitoring
3+
// ==============================================================================
4+
5+
logging {
6+
level = "info"
7+
format = "logfmt"
8+
}
9+
10+
// ==============================================================================
11+
// DOCKER CONTAINER LOGS
12+
// ==============================================================================
13+
14+
discovery.docker "containers" {
15+
host = "unix:///var/run/docker.sock"
16+
}
17+
18+
discovery.relabel "containers" {
19+
targets = discovery.docker.containers.targets
20+
21+
// Essential labels only: container name, compose project, service name, host
22+
rule {
23+
source_labels = ["__meta_docker_container_name"]
24+
regex = "/(.*)"
25+
target_label = "container"
26+
}
27+
28+
rule {
29+
source_labels = ["__meta_docker_container_label_com_docker_compose_project"]
30+
target_label = "compose_project"
31+
}
32+
33+
rule {
34+
source_labels = ["__meta_docker_container_label_com_docker_compose_service"]
35+
target_label = "service"
36+
}
37+
38+
rule {
39+
target_label = "host"
40+
replacement = constants.hostname
41+
}
42+
}
43+
44+
loki.source.docker "docker_logs" {
45+
host = "unix:///var/run/docker.sock"
46+
targets = discovery.relabel.containers.output
47+
forward_to = [loki.process.docker_logs.receiver]
48+
}
49+
50+
loki.process "docker_logs" {
51+
forward_to = [loki.write.loki.receiver]
52+
stage.docker {}
53+
}
54+
55+
// ==============================================================================
56+
// SYSTEM LOGS (journald)
57+
// ==============================================================================
58+
59+
loki.source.journal "system_logs" {
60+
forward_to = [loki.write.loki.receiver]
61+
labels = {
62+
job = "systemd-journal",
63+
}
64+
}
65+
66+
// ==============================================================================
67+
// LOKI WRITE ENDPOINT
68+
// (metrics are scraped directly by Prometheus; Alloy only handles logs here)
69+
// ==============================================================================
70+
71+
loki.write "loki" {
72+
endpoint {
73+
url = "http://loki:3100/loki/api/v1/push"
74+
}
75+
76+
external_labels = {
77+
cluster = "netbird-selfhosted",
78+
env = "production",
79+
}
80+
}
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
networks:
2+
monitoring:
3+
driver: bridge
4+
# Connect to NetBird's main Docker network created by the NetBird compose stack
5+
netbird:
6+
external: true
7+
name: netbird_netbird
8+
9+
volumes:
10+
loki_data: {}
11+
prometheus_data: {}
12+
grafana_data: {}
13+
netbird_management_data:
14+
external: true
15+
name: netbird_netbird_management
16+
17+
services:
18+
# =============================================================================
19+
# LOKI - Log Aggregation
20+
# =============================================================================
21+
loki:
22+
image: grafana/loki:3.0.0
23+
container_name: loki
24+
user: "0:0"
25+
restart: unless-stopped
26+
ports:
27+
- "3100:3100"
28+
command: -config.file=/etc/loki/local-config.yaml
29+
volumes:
30+
- ./loki-config.yaml:/etc/loki/local-config.yaml:ro
31+
- loki_data:/loki
32+
networks:
33+
- monitoring
34+
- netbird
35+
healthcheck:
36+
test: [ "CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3100/ready" ]
37+
interval: 10s
38+
timeout: 5s
39+
retries: 5
40+
start_period: 10s
41+
logging:
42+
driver: "json-file"
43+
options:
44+
max-size: "10m"
45+
max-file: "3"
46+
47+
# =============================================================================
48+
# PROMETHEUS - Metrics Storage
49+
# =============================================================================
50+
prometheus:
51+
image: prom/prometheus:v2.54.1
52+
container_name: prometheus
53+
user: "0:0"
54+
restart: unless-stopped
55+
ports:
56+
- "9090:9090"
57+
command:
58+
- '--config.file=/etc/prometheus/prometheus.yml'
59+
- '--storage.tsdb.path=/prometheus'
60+
- '--storage.tsdb.retention.time=30d'
61+
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
62+
- '--web.console.templates=/usr/share/prometheus/consoles'
63+
- '--web.enable-lifecycle'
64+
- '--web.enable-admin-api'
65+
volumes:
66+
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
67+
- prometheus_data:/prometheus
68+
networks:
69+
- monitoring
70+
- netbird
71+
extra_hosts:
72+
- "host.docker.internal:host-gateway"
73+
healthcheck:
74+
test: [ "CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/healthy" ]
75+
interval: 10s
76+
timeout: 5s
77+
retries: 5
78+
logging:
79+
driver: "json-file"
80+
options:
81+
max-size: "10m"
82+
max-file: "3"
83+
84+
# =============================================================================
85+
# GRAFANA - Visualization
86+
# =============================================================================
87+
grafana:
88+
image: grafana/grafana:11.3.0
89+
container_name: grafana
90+
user: "0:0"
91+
restart: unless-stopped
92+
ports:
93+
- "3000:3000"
94+
environment:
95+
- GF_SECURITY_ADMIN_USER=admin
96+
- GF_SECURITY_ADMIN_PASSWORD=admin
97+
- GF_PATHS_PROVISIONING=/etc/grafana/provisioning
98+
- GF_FEATURE_TOGGLES_ENABLE=publicDashboards
99+
- GF_LOG_LEVEL=info
100+
- GF_AUTH_ANONYMOUS_ENABLED=false
101+
volumes:
102+
- grafana_data:/var/lib/grafana
103+
networks:
104+
- monitoring
105+
- netbird
106+
depends_on:
107+
loki:
108+
condition: service_healthy
109+
prometheus:
110+
condition: service_healthy
111+
healthcheck:
112+
test: [ "CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/api/health" ]
113+
interval: 10s
114+
timeout: 5s
115+
retries: 5
116+
logging:
117+
driver: "json-file"
118+
options:
119+
max-size: "10m"
120+
max-file: "3"
121+
122+
# =============================================================================
123+
# ALLOY - Telemetry Collection (logs only, no HTTP healthcheck)
124+
# =============================================================================
125+
alloy:
126+
image: grafana/alloy:v1.4.2
127+
container_name: alloy
128+
user: "0:0"
129+
restart: unless-stopped
130+
ports:
131+
- "12345:12345"
132+
command:
133+
- run
134+
- --server.http.listen-addr=0.0.0.0:12345
135+
- --storage.path=/var/lib/alloy/data
136+
- /etc/alloy/config.alloy
137+
volumes:
138+
- ./alloy-config.alloy:/etc/alloy/config.alloy:ro
139+
- /var/run/docker.sock:/var/run/docker.sock:ro
140+
- /var/log:/var/log:ro
141+
- /sys:/sys:ro
142+
- /proc:/proc:ro
143+
networks:
144+
- monitoring
145+
- netbird
146+
depends_on:
147+
loki:
148+
condition: service_healthy
149+
prometheus:
150+
condition: service_healthy
151+
logging:
152+
driver: "json-file"
153+
options:
154+
max-size: "10m"
155+
max-file: "3"
156+
157+
# =============================================================================
158+
# NODE EXPORTER - Host & cgroup metrics
159+
# =============================================================================
160+
node-exporter:
161+
image: prom/node-exporter:v1.8.2
162+
container_name: node-exporter
163+
restart: unless-stopped
164+
ports:
165+
- "9100:9100"
166+
command:
167+
- '--path.procfs=/host/proc'
168+
- '--path.sysfs=/host/sys'
169+
- '--path.rootfs=/rootfs'
170+
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
171+
- '--collector.netclass.ignored-devices=^(veth.*|br.*|docker.*|virbr.*|lo)$$'
172+
- '--collector.netdev.device-exclude=^(veth.*|br.*|docker.*|virbr.*|lo)$$'
173+
- '--collector.cgroups'
174+
volumes:
175+
- /proc:/host/proc:ro
176+
- /sys:/host/sys:ro
177+
- /:/rootfs:ro
178+
networks:
179+
- monitoring
180+
pid: host
181+
logging:
182+
driver: "json-file"
183+
options:
184+
max-size: "10m"
185+
max-file: "3"
186+
187+
# =============================================================================
188+
# CONTAINER CGROUP METRICS EXPORTER
189+
# =============================================================================
190+
container-metrics:
191+
image: ghcr.io/mosquito/cgroups-exporter:latest
192+
container_name: container-metrics
193+
restart: unless-stopped
194+
# On cgroup v2 + systemd, Docker container cgroups typically appear under
195+
# /sys/fs/cgroup/system.slice/docker-<long-id>.scope/. We mount the host
196+
# /sys tree as /host_sys and point cgroups-exporter at a glob that matches
197+
# those scopes.
198+
command:
199+
- cgroups-exporter
200+
- --cgroups-path
201+
- "/host_sys/fs/cgroup/system.slice/docker-*.scope/"
202+
volumes:
203+
- /sys/:/host_sys:ro
204+
networks:
205+
- monitoring
206+
logging:
207+
driver: "json-file"
208+
options:
209+
max-size: "10m"
210+
max-file: "3"
211+
212+
# =============================================================================
213+
# NETBIRD EVENTS EXPORTER - Rust Version
214+
# =============================================================================
215+
netbird-events-exporter:
216+
image: ghcr.io/onelrian/signal:latest
217+
container_name: netbird-events-exporter
218+
restart: unless-stopped
219+
environment:
220+
- NETBIRD_API_URL=${NETBIRD_API_URL:-https://${NETBIRD_DOMAIN}/api}
221+
- NETBIRD_API_TOKEN=${NETBIRD_PAT}
222+
- LOKI_URL=http://loki:3100
223+
- RUST_LOG=info
224+
depends_on:
225+
loki:
226+
condition: service_healthy
227+
networks:
228+
- monitoring
229+
- netbird

lgtm-stack/Manual/loki-config.yaml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
auth_enabled: false
2+
3+
server:
4+
http_listen_port: 3100
5+
grpc_listen_port: 9096
6+
7+
common:
8+
instance_addr: 127.0.0.1
9+
path_prefix: /loki
10+
storage:
11+
filesystem:
12+
chunks_directory: /loki/chunks
13+
rules_directory: /loki/rules
14+
replication_factor: 1
15+
ring:
16+
kvstore:
17+
store: inmemory
18+
19+
schema_config:
20+
configs:
21+
- from: 2024-01-01
22+
store: tsdb
23+
object_store: filesystem
24+
schema: v13
25+
index:
26+
prefix: index_
27+
period: 24h
28+
29+
limits_config:
30+
retention_period: 720h # 30 days
31+
reject_old_samples: true
32+
reject_old_samples_max_age: 168h
33+
ingestion_rate_mb: 100
34+
ingestion_burst_size_mb: 200
35+
per_stream_rate_limit: 50MB
36+
per_stream_rate_limit_burst: 100MB
37+
max_entries_limit_per_query: 100000
38+
max_streams_per_user: 0 # unlimited
39+
40+
compactor:
41+
working_directory: /loki/compactor
42+
compaction_interval: 10m
43+
retention_enabled: true
44+
retention_delete_delay: 2h
45+
retention_delete_worker_count: 150
46+
delete_request_store: filesystem
47+
48+
analytics:
49+
reporting_enabled: false

0 commit comments

Comments
 (0)