Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions framework/observability/compose/conf/process-exporter.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
process_names:
- name: "{{.Comm}}|{{ index .Cgroups 0 }}"
cmdline:
- '.+'
35 changes: 35 additions & 0 deletions framework/observability/compose/conf/prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ scrape_configs:
static_configs:
- targets:
- cadvisor:8080
metric_relabel_configs:
# Extract hex ID from common cgroup/runtime formats:
# /docker/<id>, docker-<id>.scope, containerd://<id>, cri-containerd-<id>.scope, etc.
- source_labels: [ id ]
regex: '.*(?:/docker/|docker-|containerd://|containerd-|cri-containerd-|crio-)([a-f0-9]{12,64})(?:\.scope)?.*'
target_label: container_id
replacement: '$1'
action: replace
- job_name: 'postgres_exporter_0'
static_configs:
- targets: ['postgres_exporter_0:9187']
Expand All @@ -35,3 +43,30 @@ scrape_configs:
- job_name: 'postgres_exporter_4'
static_configs:
- targets: ['postgres_exporter_4:9187']
- job_name: 'process_exporter'
static_configs:
- targets: ['process-exporter:9256']
metric_relabel_configs:
# Extract container id from groupname so we can match it to the container
- source_labels: [groupname]
regex: "^[^|]+\\|.*/([a-f0-9]{12,64})$"
target_label: container_id
replacement: "$1"
action: replace

# We detect the special '/../..' tail and set container_id=host.
- source_labels: [groupname]
regex: "^[^|]+\\|/\\.\\./\\.\\.$"
target_label: container_id
replacement: "host"
action: replace

- source_labels: [groupname]
regex: "^([^|]+)\\|.*"
target_label: groupname
replacement: "$1"
action: replace
- job_name: container-sd
file_sd_configs:
- files: [ "/etc/prometheus/targets/merged.json" ]
refresh_interval: 15s
31 changes: 31 additions & 0 deletions framework/observability/compose/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ services:
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- ./conf/prometheus.yml:/etc/prometheus/prometheus.yml
- sd-targets:/etc/prometheus/targets
ports:
- '9099:9090'

Expand Down Expand Up @@ -135,13 +136,43 @@ services:
- '9304:9187'
restart: unless-stopped

process-exporter:
image: ncabatoff/process-exporter:latest
container_name: process-exporter
pid: host
privileged: true
volumes:
- /proc:/host/proc:ro
- ./conf/process-exporter.yaml:/config.yaml:ro
command:
- "--procfs=/host/proc"
- "--config.path=/config.yaml"
ports:
- "9256:9256"

sd-bridge:
image: alpine:3.20
command: [ "/bin/sh","-c","apk add --no-cache bash curl jq docker-cli && exec bash scripts/sd-bridge.sh" ]
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- sd-targets:/out
- ./scripts:/scripts:ro
environment:
LABEL_MATCH: "prom_sd=true"
DISCOVERY_PATH: "/discovery"
DISCOVERY_PORT: "6688"
DISCOVERY_SCHEME: "http"
OUT: "/out/merged.json"
SLEEP: "15"

volumes:
loki_data:
grafana_data:
grafana_home:
grafana_logs:
grafana_plugins:
tempo_data:
sd-targets:

networks:
default:
Expand Down
105 changes: 105 additions & 0 deletions framework/observability/compose/scripts/sd-bridge.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/usr/bin/env bash
# sd-bridge.sh (simplified)
# Discover Docker containers by label, pull each container's discovery JSON,
# add labels, merge + de-duplicate, and write a single file_sd JSON.

set -Eeuo pipefail

# --- Config (env) ---
LABEL_MATCH="${LABEL_MATCH:-framework=ctf}"
DEFAULT_PATH="${DISCOVERY_PATH:-/discovery}"
DEFAULT_PORT="${DISCOVERY_PORT:-6688}"
DEFAULT_SCHEME="${DISCOVERY_SCHEME:-http}"
PREFER_NETWORK="${NETWORK_NAME:-}"
OUT="${OUT:-/out/merged.json}"
SLEEP="${SLEEP:-15}"
REQUEST_TIMEOUT="${REQUEST_TIMEOUT:-5}"
REWRITE_TO_IP="${REWRITE_TO_IP:-0}" # set to 1 to replace host with container IP

# --- Helpers ---
log(){ printf '[sd-bridge] %s\n' "$*" >&2; }
get_ip(){
local cid="$1" net="$2"
if [[ -n "$net" ]]; then
docker inspect "$cid" | jq -r --arg n "$net" '.[0].NetworkSettings.Networks[$n].IPAddress // empty'
else
docker inspect "$cid" | jq -r '.[0].NetworkSettings.Networks | to_entries[0].value.IPAddress // empty'
fi
}
get_label(){ docker inspect "$1" | jq -r --arg k "$2" '.[0].Config.Labels[$k] // empty'; }
get_name(){ docker inspect "$1" | jq -r '.[0].Name | ltrimstr("/")'; }
merge_and_dedupe(){
jq -s '
add // []
| map({targets: (.targets // []), labels: (.labels // {})})
| group_by(.labels)
| map({labels: (.[0].labels), targets: ([.[].targets[]] | unique | sort)})
'
}
atomic_write(){ local p="$1" t="$1.tmp"; cat >"$t" && mv "$t" "$p"; }

# --- Init ---
mkdir -p "$(dirname "$OUT")"
echo '[]' | atomic_write "$OUT"

# --- Main loop ---
while true; do
mapfile -t cids < <(docker ps -q --filter "label=$LABEL_MATCH" || true)
if (( ${#cids[@]} == 0 )); then
echo '[]' | atomic_write "$OUT"
log "no matching containers; wrote empty array"
sleep "$SLEEP"; continue
fi

files=()
for cid in "${cids[@]}"; do
ip="$(get_ip "$cid" "$PREFER_NETWORK")"
[[ -z "$ip" ]] && { log "skip ${cid:0:12}: no IP"; continue; }
name="$(get_name "$cid")"

# Per-container overrides (optional)
path="$(get_label "$cid" prom_sd_path)"; path="${path:-$DEFAULT_PATH}"
port="$(get_label "$cid" prom_sd_port)"; port="${port:-$DEFAULT_PORT}"
scheme="$(get_label "$cid" prom_sd_scheme)"; scheme="${scheme:-$DEFAULT_SCHEME}"

url="${scheme}://${ip}:${port}${path}"
f="$(mktemp)"; files+=("$f")
if curl -fsSL --max-time "$REQUEST_TIMEOUT" "$url" | jq '.' > "$f" 2>/dev/null; then
# Add labels (and optionally rewrite host -> container IP)
if [[ "$REWRITE_TO_IP" == "1" ]]; then
jq --arg ip "$ip" --arg name "$name" '
map(
.targets |= map($ip + ":" + (split(":")[1])) |
.labels = ((.labels // {}) + {
container_name: $name,
scrape_path: (.labels.__metrics_path__ // "")
})
)
' "$f" > "$f.tmp" && mv "$f.tmp" "$f"
else
jq --arg name "$name" '
map(
.labels = ((.labels // {}) + {
container_name: $name,
scrape_path: (.labels.__metrics_path__ // "")
})
)
' "$f" > "$f.tmp" && mv "$f.tmp" "$f"
fi
log "ok ${url}"
else
log "fail ${url}; using []"
echo '[]' > "$f"
fi
done

if (( ${#files[@]} > 0 )); then
cat "${files[@]}" | merge_and_dedupe | atomic_write "$OUT"
rm -f "${files[@]}"
log "merged ${#files[@]} lists into $(wc -c < "$OUT") bytes"
else
echo '[]' | atomic_write "$OUT"
fi

sleep "$SLEEP"
done
Loading