Skip to content

Commit 0dbd432

Browse files
authored
Merge pull request #252 from itsDNNS/feat/196-traceroute-burst-capture
feat(cm): traceroute burst capture (#196)
2 parents 7006241 + 13d899e commit 0dbd432

25 files changed

+3391
-13
lines changed

Dockerfile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1111
COPY requirements.txt .
1212
RUN pip install --no-cache-dir --prefix=/install -r requirements.txt
1313
COPY tools/icmp_probe_helper.c /build/icmp_probe_helper.c
14+
COPY tools/traceroute_helper.c /build/traceroute_helper.c
1415
RUN mkdir -p /build/out && \
15-
gcc -O2 -Wall -o /build/out/docsight-icmp-helper /build/icmp_probe_helper.c
16+
gcc -O2 -Wall -o /build/out/docsight-icmp-helper /build/icmp_probe_helper.c && \
17+
gcc -O2 -Wall -o /build/out/docsight-traceroute-helper /build/traceroute_helper.c
1618

1719
# --- runtime stage: slim final image ---
1820
FROM python:3.12-slim
@@ -22,13 +24,16 @@ RUN echo "${VERSION}" > /app/VERSION
2224

2325
COPY --from=builder /install /usr/local
2426
COPY --from=builder /build/out/docsight-icmp-helper /usr/local/bin/docsight-icmp-helper
27+
COPY --from=builder /build/out/docsight-traceroute-helper /usr/local/bin/docsight-traceroute-helper
2528

2629
# Keep elevated privileges scoped to the dedicated ICMP helper.
2730
RUN apt-get update && apt-get install -y --no-install-recommends \
2831
gosu \
2932
libjpeg62-turbo \
3033
&& chown root:root /usr/local/bin/docsight-icmp-helper \
3134
&& chmod 4755 /usr/local/bin/docsight-icmp-helper \
35+
&& chown root:root /usr/local/bin/docsight-traceroute-helper \
36+
&& chmod 4755 /usr/local/bin/docsight-traceroute-helper \
3237
&& rm -rf /var/lib/apt/lists/*
3338

3439
RUN adduser --disabled-password --gecos "" --uid 1000 appuser && \

app/blueprints/config_bp.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,13 @@ def api_demo_migrate():
116116
return jsonify({"success": False, "error": "Storage not initialized"}), 500
117117
try:
118118
purged = _storage.purge_demo_data()
119+
# Purge demo traceroute traces from Connection Monitor
120+
import os
121+
from app.modules.connection_monitor.storage import ConnectionMonitorStorage
122+
cm_db_path = os.path.join(os.environ.get("DATA_DIR", "/data"), "connection_monitor.db")
123+
if os.path.exists(cm_db_path):
124+
cm_storage = ConnectionMonitorStorage(cm_db_path)
125+
cm_storage.purge_demo_traces()
119126
_config_manager.save({"demo_mode": False})
120127
_storage.max_days = _config_manager.get("history_days", 7)
121128
audit_log.info("Demo migration: ip=%s purged=%d rows", _get_client_ip(), purged)

app/collectors/demo.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -993,6 +993,94 @@ def _seed_connection_monitor_data(self, now):
993993

994994
log.info("Demo: seeded %d connection monitor samples (%d days, 3 targets)", len(rows), days)
995995

996+
# --- Seed traceroute traces ---
997+
self._seed_traceroute_traces(cm, cf_id, gg_id, now, rng)
998+
999+
def _seed_traceroute_traces(self, cm, cf_id, gg_id, now, rng):
1000+
"""Seed realistic traceroute traces for demo targets."""
1001+
import hashlib
1002+
1003+
# Realistic hop templates: home -> ISP -> backbone -> target
1004+
hop_templates = {
1005+
cf_id: [
1006+
{"hop_ip": "192.168.178.1", "hop_host": "fritz.box", "base_lat": 1.2},
1007+
{"hop_ip": "62.155.243.1", "hop_host": "dslam-ffm.telekom.de", "base_lat": 5.8},
1008+
{"hop_ip": "62.157.250.22", "hop_host": "cr-ffm01.telekom.de", "base_lat": 8.1},
1009+
{"hop_ip": "62.157.250.89", "hop_host": "cr-ffm02.telekom.de", "base_lat": 8.9},
1010+
{"hop_ip": "80.156.160.178", "hop_host": "decix-peer.telekom.de", "base_lat": 10.3},
1011+
{"hop_ip": "172.71.128.2", "hop_host": "cloudflare-ic.decix.net", "base_lat": 11.0},
1012+
{"hop_ip": "172.71.128.34", "hop_host": None, "base_lat": 11.5},
1013+
{"hop_ip": "104.16.132.229", "hop_host": "one.one.one.one", "base_lat": 11.8},
1014+
{"hop_ip": None, "hop_host": None, "base_lat": None}, # timeout hop
1015+
{"hop_ip": "172.71.0.150", "hop_host": None, "base_lat": 12.1},
1016+
{"hop_ip": "1.1.1.1", "hop_host": "one.one.one.one", "base_lat": 12.4},
1017+
],
1018+
gg_id: [
1019+
{"hop_ip": "192.168.178.1", "hop_host": "fritz.box", "base_lat": 1.1},
1020+
{"hop_ip": "62.155.243.1", "hop_host": "dslam-ffm.telekom.de", "base_lat": 5.6},
1021+
{"hop_ip": "62.157.250.22", "hop_host": "cr-ffm01.telekom.de", "base_lat": 8.0},
1022+
{"hop_ip": "62.157.250.89", "hop_host": "cr-ffm02.telekom.de", "base_lat": 8.7},
1023+
{"hop_ip": "80.156.160.178", "hop_host": "decix-peer.telekom.de", "base_lat": 10.1},
1024+
{"hop_ip": "209.85.149.32", "hop_host": "google-ic.decix.net", "base_lat": 11.2},
1025+
{"hop_ip": "108.170.236.57", "hop_host": None, "base_lat": 12.0},
1026+
{"hop_ip": "142.251.51.15", "hop_host": None, "base_lat": 13.1},
1027+
{"hop_ip": None, "hop_host": None, "base_lat": None}, # timeout hop
1028+
{"hop_ip": "108.170.232.97", "hop_host": None, "base_lat": 13.8},
1029+
{"hop_ip": "142.250.236.131", "hop_host": None, "base_lat": 14.2},
1030+
{"hop_ip": "8.8.8.8", "hop_host": "dns.google", "base_lat": 14.5},
1031+
],
1032+
}
1033+
1034+
trace_configs = [
1035+
{"target_id": cf_id, "days_ago": 5, "trigger": "outage", "reached": True},
1036+
{"target_id": cf_id, "days_ago": 2, "trigger": "packet_loss", "reached": True},
1037+
{"target_id": cf_id, "days_ago": 0, "trigger": "manual", "reached": True},
1038+
{"target_id": gg_id, "days_ago": 4, "trigger": "outage", "reached": True},
1039+
{"target_id": gg_id, "days_ago": 1, "trigger": "manual", "reached": True},
1040+
]
1041+
1042+
for tc in trace_configs:
1043+
template = hop_templates[tc["target_id"]]
1044+
hops = []
1045+
ips_for_fp = []
1046+
for i, tmpl in enumerate(template):
1047+
if tmpl["base_lat"] is None:
1048+
# Timeout hop
1049+
hops.append({
1050+
"hop_index": i + 1,
1051+
"hop_ip": None,
1052+
"hop_host": None,
1053+
"latency_ms": None,
1054+
"probes_responded": 0,
1055+
})
1056+
ips_for_fp.append("*")
1057+
else:
1058+
lat = round(tmpl["base_lat"] + rng.uniform(-0.5, 0.5), 2)
1059+
probes = 3 if rng.random() > 0.05 else rng.randint(1, 2)
1060+
hops.append({
1061+
"hop_index": i + 1,
1062+
"hop_ip": tmpl["hop_ip"],
1063+
"hop_host": tmpl["hop_host"],
1064+
"latency_ms": lat,
1065+
"probes_responded": probes,
1066+
})
1067+
ips_for_fp.append(tmpl["hop_ip"] or "*")
1068+
1069+
fp = hashlib.md5(">".join(ips_for_fp).encode()).hexdigest()[:16]
1070+
ts = (now - timedelta(days=tc["days_ago"], hours=rng.randint(0, 12))).timestamp()
1071+
1072+
cm.save_trace(
1073+
target_id=tc["target_id"],
1074+
timestamp=ts,
1075+
trigger_reason=tc["trigger"],
1076+
hops=hops,
1077+
route_fingerprint=fp,
1078+
reached_target=tc["reached"],
1079+
is_demo=True,
1080+
)
1081+
1082+
log.info("Demo: seeded %d traceroute traces", len(trace_configs))
1083+
9961084
@staticmethod
9971085
def _generate_bqm_png(width=800, height=200, seed=0):
9981086
"""Generate a simple BQM-style quality graph as PNG bytes."""

app/main.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,12 @@ def _run_collector(collector):
294294
stop_event.wait(1)
295295
finally:
296296
executor.shutdown(wait=False, cancel_futures=True)
297+
for c in collectors:
298+
if hasattr(c, "stop"):
299+
try:
300+
c.stop()
301+
except Exception:
302+
pass
297303

298304
# Cleanup MQTT
299305
if mqtt_pub:

app/modules/connection_monitor/collector.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from app.modules.connection_monitor.event_rules import ConnectionEventRules
1010
from app.modules.connection_monitor.probe import ProbeEngine
1111
from app.modules.connection_monitor.storage import ConnectionMonitorStorage
12+
from app.modules.connection_monitor.traceroute_probe import TracerouteProbe
13+
from app.modules.connection_monitor.traceroute_trigger import TracerouteTrigger
1214

1315
logger = logging.getLogger(__name__)
1416

@@ -29,6 +31,7 @@ def __init__(self, config_mgr, storage, web, **kwargs):
2931

3032
method = config_mgr.get("connection_monitor_probe_method", "auto")
3133
self._probe = ProbeEngine(method=method)
34+
self._traceroute_probe = TracerouteProbe()
3235
self._last_probe: dict[int, float] = {}
3336
self._last_cleanup = 0.0
3437
self._event_rules = ConnectionEventRules(
@@ -39,6 +42,10 @@ def __init__(self, config_mgr, storage, web, **kwargs):
3942
data_dir = os.environ.get("DATA_DIR", "/data")
4043
db_path = os.path.join(data_dir, "connection_monitor.db")
4144
self._cm_storage = ConnectionMonitorStorage(db_path)
45+
self._traceroute_trigger = TracerouteTrigger(
46+
probe=self._traceroute_probe,
47+
storage=self._cm_storage,
48+
)
4249

4350
self._seeded = False
4451
self._smart_capture = None
@@ -92,6 +99,7 @@ def collect(self) -> CollectorResult:
9299
self._config_mgr.get("connection_monitor_retention_days", 0)
93100
)
94101
self._cm_storage.cleanup(retention)
102+
self._cm_storage.cleanup_traces(retention)
95103
self._last_cleanup = now
96104

97105
return CollectorResult.ok(self.name, {"probed": len(due)})
@@ -153,6 +161,9 @@ def _check_events(self, samples: list[dict]):
153161
)
154162
all_events.extend(events)
155163

164+
for event in all_events:
165+
self._traceroute_trigger.on_event(event)
166+
156167
if all_events and hasattr(self._core_storage, "save_events_with_ids"):
157168
self._core_storage.save_events_with_ids(all_events)
158169
if self._smart_capture:
@@ -175,3 +186,7 @@ def get_storage(self) -> ConnectionMonitorStorage:
175186
def get_probe(self) -> ProbeEngine:
176187
"""Expose probe engine for capability endpoint."""
177188
return self._probe
189+
190+
def stop(self):
191+
"""Shutdown background resources."""
192+
self._traceroute_trigger.shutdown()

app/modules/connection_monitor/i18n/de.json

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,19 @@
5454
"cm_resolution_raw": "Rohdaten",
5555
"cm_resolution_1min": "1-Min-Durchschnitte",
5656
"cm_resolution_5min": "5-Min-Durchschnitte",
57-
"cm_resolution_1hr": "1-Std-Durchschnitte"
57+
"cm_resolution_1hr": "1-Std-Durchschnitte",
58+
"traceroute.run_button": "Traceroute starten",
59+
"traceroute.running": "Traceroute wird ausgeführt...",
60+
"traceroute.result": "Traceroute-Ergebnis",
61+
"traceroute.trigger_manual": "Manuell",
62+
"traceroute.trigger_outage": "Ausfall",
63+
"traceroute.trigger_packet_loss": "Paketverlust",
64+
"traceroute.hop": "Hop",
65+
"traceroute.hops": "Hops",
66+
"traceroute.reached": "Ziel erreicht",
67+
"traceroute.not_reached": "Ziel nicht erreicht",
68+
"traceroute.history": "Traceroute-Verlauf",
69+
"traceroute.no_traces": "Noch keine Traceroutes aufgezeichnet.",
70+
"traceroute.probes_responded": "Antworten",
71+
"traceroute.partial_result": "Teilergebnis (Zeitüberschreitung)"
5872
}

app/modules/connection_monitor/i18n/en.json

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,19 @@
5454
"cm_resolution_raw": "Raw samples",
5555
"cm_resolution_1min": "1-min averages",
5656
"cm_resolution_5min": "5-min averages",
57-
"cm_resolution_1hr": "1-hour averages"
57+
"cm_resolution_1hr": "1-hour averages",
58+
"traceroute.run_button": "Run Traceroute",
59+
"traceroute.running": "Running traceroute...",
60+
"traceroute.result": "Traceroute Result",
61+
"traceroute.trigger_manual": "Manual",
62+
"traceroute.trigger_outage": "Outage",
63+
"traceroute.trigger_packet_loss": "Packet Loss",
64+
"traceroute.hop": "Hop",
65+
"traceroute.hops": "Hops",
66+
"traceroute.reached": "Target reached",
67+
"traceroute.not_reached": "Target not reached",
68+
"traceroute.history": "Traceroute History",
69+
"traceroute.no_traces": "No traceroutes recorded yet.",
70+
"traceroute.probes_responded": "Probes",
71+
"traceroute.partial_result": "Partial result (timeout)"
5872
}

app/modules/connection_monitor/i18n/es.json

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,19 @@
5454
"cm_resolution_raw": "Datos crudos",
5555
"cm_resolution_1min": "Promedios de 1 min",
5656
"cm_resolution_5min": "Promedios de 5 min",
57-
"cm_resolution_1hr": "Promedios de 1 h"
57+
"cm_resolution_1hr": "Promedios de 1 h",
58+
"traceroute.run_button": "Ejecutar Traceroute",
59+
"traceroute.running": "Ejecutando traceroute...",
60+
"traceroute.result": "Resultado del Traceroute",
61+
"traceroute.trigger_manual": "Manual",
62+
"traceroute.trigger_outage": "Corte",
63+
"traceroute.trigger_packet_loss": "Perdida de paquetes",
64+
"traceroute.hop": "Salto",
65+
"traceroute.hops": "Saltos",
66+
"traceroute.reached": "Objetivo alcanzado",
67+
"traceroute.not_reached": "Objetivo no alcanzado",
68+
"traceroute.history": "Historial de Traceroute",
69+
"traceroute.no_traces": "Aun no se han registrado traceroutes.",
70+
"traceroute.probes_responded": "Sondas",
71+
"traceroute.partial_result": "Resultado parcial (timeout)"
5872
}

app/modules/connection_monitor/i18n/fr.json

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,19 @@
5454
"cm_resolution_raw": "Donnees brutes",
5555
"cm_resolution_1min": "Moyennes 1 min",
5656
"cm_resolution_5min": "Moyennes 5 min",
57-
"cm_resolution_1hr": "Moyennes 1 h"
57+
"cm_resolution_1hr": "Moyennes 1 h",
58+
"traceroute.run_button": "Lancer Traceroute",
59+
"traceroute.running": "Traceroute en cours...",
60+
"traceroute.result": "Resultat du Traceroute",
61+
"traceroute.trigger_manual": "Manuel",
62+
"traceroute.trigger_outage": "Panne",
63+
"traceroute.trigger_packet_loss": "Perte de paquets",
64+
"traceroute.hop": "Saut",
65+
"traceroute.hops": "Sauts",
66+
"traceroute.reached": "Cible atteinte",
67+
"traceroute.not_reached": "Cible non atteinte",
68+
"traceroute.history": "Historique Traceroute",
69+
"traceroute.no_traces": "Aucun traceroute enregistre.",
70+
"traceroute.probes_responded": "Sondes",
71+
"traceroute.partial_result": "Resultat partiel (timeout)"
5872
}

app/modules/connection_monitor/routes.py

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import math
77
import re
88
import time
9-
from datetime import datetime
9+
from datetime import datetime, timezone
1010

1111
from flask import Blueprint, jsonify, request, Response
1212

@@ -50,6 +50,31 @@ def _get_probe_engine():
5050
return ProbeEngine(method=method)
5151

5252

53+
_traceroute_probe = None
54+
55+
56+
def _get_traceroute_probe():
57+
global _traceroute_probe
58+
if _traceroute_probe is None:
59+
from app.modules.connection_monitor.traceroute_probe import TracerouteProbe
60+
_traceroute_probe = TracerouteProbe()
61+
return _traceroute_probe
62+
63+
64+
def _epoch_to_iso(ts):
65+
return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
66+
67+
68+
def _hop_to_dict(hop):
69+
return {
70+
"hop_index": hop.hop_index,
71+
"hop_ip": hop.hop_ip,
72+
"hop_host": hop.hop_host,
73+
"latency_ms": hop.latency_ms,
74+
"probes_responded": hop.probes_responded,
75+
}
76+
77+
5378
# --- Targets ---
5479

5580
@bp.route("/api/connection-monitor/targets", methods=["GET"])
@@ -516,3 +541,67 @@ def api_export_csv(target_id):
516541
def api_capability():
517542
probe = _get_probe_engine()
518543
return jsonify(probe.capability_info())
544+
545+
546+
# --- Traceroute ---
547+
548+
@bp.route("/api/connection-monitor/traceroute/<int:target_id>", methods=["POST"])
549+
@require_auth
550+
def api_run_traceroute(target_id):
551+
storage = _get_cm_storage()
552+
target = storage.get_target(target_id)
553+
if not target:
554+
return jsonify({"error": "Target not found"}), 404
555+
556+
probe = _get_traceroute_probe()
557+
result = probe.run(target["host"])
558+
559+
trace_id = storage.save_trace(
560+
target_id=target_id,
561+
timestamp=time.time(),
562+
trigger_reason="manual",
563+
hops=[{
564+
"hop_index": h.hop_index, "hop_ip": h.hop_ip,
565+
"hop_host": h.hop_host, "latency_ms": h.latency_ms,
566+
"probes_responded": h.probes_responded,
567+
} for h in result.hops],
568+
route_fingerprint=result.route_fingerprint,
569+
reached_target=result.reached_target,
570+
)
571+
572+
return jsonify({
573+
"trace_id": trace_id,
574+
"timestamp": _epoch_to_iso(time.time()),
575+
"trigger_reason": "manual",
576+
"reached_target": result.reached_target,
577+
"hop_count": len(result.hops),
578+
"route_fingerprint": result.route_fingerprint,
579+
"hops": [_hop_to_dict(h) for h in result.hops],
580+
})
581+
582+
583+
@bp.route("/api/connection-monitor/traces/<int:target_id>")
584+
@require_auth
585+
def api_get_traces(target_id):
586+
storage = _get_cm_storage()
587+
start = request.args.get("start", type=float)
588+
end = request.args.get("end", type=float)
589+
limit = request.args.get("limit", 100, type=int)
590+
limit = max(1, min(limit, 1000))
591+
traces = storage.get_traces(target_id, start=start, end=end, limit=limit)
592+
for t in traces:
593+
t["timestamp"] = _epoch_to_iso(t["timestamp"])
594+
return jsonify(traces)
595+
596+
597+
@bp.route("/api/connection-monitor/trace/<int:trace_id>")
598+
@require_auth
599+
def api_get_trace_detail(trace_id):
600+
storage = _get_cm_storage()
601+
trace = storage.get_trace(trace_id)
602+
if not trace:
603+
return jsonify({"error": "Trace not found"}), 404
604+
hops = storage.get_trace_hops(trace_id)
605+
trace["timestamp"] = _epoch_to_iso(trace["timestamp"])
606+
trace["hops"] = hops
607+
return jsonify(trace)

0 commit comments

Comments
 (0)