Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b7ff4aa
docs: add traceroute burst capture design spec (#196)
itsDNNS Mar 17, 2026
e89b8f9
docs: add traceroute burst capture implementation plan (#196)
itsDNNS Mar 17, 2026
79d1504
feat(cm): add traceroute helper C binary (#196)
itsDNNS Mar 17, 2026
dcf5361
build: add traceroute helper to Docker build (#196)
itsDNNS Mar 17, 2026
4ef9574
test(cm): add TracerouteProbe unit tests (#196)
itsDNNS Mar 17, 2026
c5a36ec
feat(cm): implement TracerouteProbe wrapper (#196)
itsDNNS Mar 17, 2026
7fc2321
test(cm): add traceroute storage unit tests (#196)
itsDNNS Mar 17, 2026
c0dc583
feat(cm): add traceroute storage tables and CRUD (#196)
itsDNNS Mar 17, 2026
a83c3ee
test(cm): add TracerouteTrigger unit tests (#196)
itsDNNS Mar 17, 2026
16281fd
feat(cm): implement TracerouteTrigger (#196)
itsDNNS Mar 17, 2026
e41d5d9
feat(cm): wire TracerouteTrigger into collector (#196)
itsDNNS Mar 17, 2026
d21db28
feat: add collector stop() hook in polling loop (#196)
itsDNNS Mar 17, 2026
73a28c4
test(cm): add traceroute route unit tests (#196)
itsDNNS Mar 17, 2026
df1d066
feat(cm): add traceroute API endpoints (#196)
itsDNNS Mar 17, 2026
3d02873
i18n(cm): add traceroute translation keys (#196)
itsDNNS Mar 17, 2026
9227366
ui(cm): add traceroute button and trace history to detail view (#196)
itsDNNS Mar 17, 2026
7f85970
feat(cm): add demo traces and migration hook (#196)
itsDNNS Mar 17, 2026
edca00d
fix(i18n): correct German umlaut in traceroute.running (#196)
itsDNNS Mar 17, 2026
9413f21
fix(cm): check seteuid return + drop privs in --check path (#196)
itsDNNS Mar 17, 2026
c195c2e
fix(test): mock utc_now in capture timeline test
itsDNNS Mar 17, 2026
9a19acb
fix(charts): preserve zoom state during auto-refresh
itsDNNS Mar 17, 2026
151df22
ux(cm): add visible Reset Zoom button and hover hint
itsDNNS Mar 17, 2026
427a63e
fix(charts): use destroy/recreate with zoom restore instead of setData
itsDNNS Mar 17, 2026
6f64123
fix(charts): prevent scroll jump by fixing container height during swap
itsDNNS Mar 17, 2026
9a2790f
ux(cm): dynamic Y-axis scaling based on actual latency data
itsDNNS Mar 17, 2026
13d899e
fix(cm): use 40ms Y-axis floor and guard against zero dataMax
itsDNNS Mar 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
COPY requirements.txt .
RUN pip install --no-cache-dir --prefix=/install -r requirements.txt
COPY tools/icmp_probe_helper.c /build/icmp_probe_helper.c
COPY tools/traceroute_helper.c /build/traceroute_helper.c
RUN mkdir -p /build/out && \
gcc -O2 -Wall -o /build/out/docsight-icmp-helper /build/icmp_probe_helper.c
gcc -O2 -Wall -o /build/out/docsight-icmp-helper /build/icmp_probe_helper.c && \
gcc -O2 -Wall -o /build/out/docsight-traceroute-helper /build/traceroute_helper.c

# --- runtime stage: slim final image ---
FROM python:3.12-slim
Expand All @@ -22,13 +24,16 @@ RUN echo "${VERSION}" > /app/VERSION

COPY --from=builder /install /usr/local
COPY --from=builder /build/out/docsight-icmp-helper /usr/local/bin/docsight-icmp-helper
COPY --from=builder /build/out/docsight-traceroute-helper /usr/local/bin/docsight-traceroute-helper

# Keep elevated privileges scoped to the dedicated ICMP helper.
RUN apt-get update && apt-get install -y --no-install-recommends \
gosu \
libjpeg62-turbo \
&& chown root:root /usr/local/bin/docsight-icmp-helper \
&& chmod 4755 /usr/local/bin/docsight-icmp-helper \
&& chown root:root /usr/local/bin/docsight-traceroute-helper \
&& chmod 4755 /usr/local/bin/docsight-traceroute-helper \
&& rm -rf /var/lib/apt/lists/*

RUN adduser --disabled-password --gecos "" --uid 1000 appuser && \
Expand Down
7 changes: 7 additions & 0 deletions app/blueprints/config_bp.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,13 @@ def api_demo_migrate():
return jsonify({"success": False, "error": "Storage not initialized"}), 500
try:
purged = _storage.purge_demo_data()
# Purge demo traceroute traces from Connection Monitor
import os
from app.modules.connection_monitor.storage import ConnectionMonitorStorage
cm_db_path = os.path.join(os.environ.get("DATA_DIR", "/data"), "connection_monitor.db")
if os.path.exists(cm_db_path):
cm_storage = ConnectionMonitorStorage(cm_db_path)
cm_storage.purge_demo_traces()
_config_manager.save({"demo_mode": False})
_storage.max_days = _config_manager.get("history_days", 7)
audit_log.info("Demo migration: ip=%s purged=%d rows", _get_client_ip(), purged)
Expand Down
88 changes: 88 additions & 0 deletions app/collectors/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,94 @@ def _seed_connection_monitor_data(self, now):

log.info("Demo: seeded %d connection monitor samples (%d days, 3 targets)", len(rows), days)

# --- Seed traceroute traces ---
self._seed_traceroute_traces(cm, cf_id, gg_id, now, rng)

def _seed_traceroute_traces(self, cm, cf_id, gg_id, now, rng):
"""Seed realistic traceroute traces for demo targets."""
import hashlib

# Realistic hop templates: home -> ISP -> backbone -> target
hop_templates = {
cf_id: [
{"hop_ip": "192.168.178.1", "hop_host": "fritz.box", "base_lat": 1.2},
{"hop_ip": "62.155.243.1", "hop_host": "dslam-ffm.telekom.de", "base_lat": 5.8},
{"hop_ip": "62.157.250.22", "hop_host": "cr-ffm01.telekom.de", "base_lat": 8.1},
{"hop_ip": "62.157.250.89", "hop_host": "cr-ffm02.telekom.de", "base_lat": 8.9},
{"hop_ip": "80.156.160.178", "hop_host": "decix-peer.telekom.de", "base_lat": 10.3},
{"hop_ip": "172.71.128.2", "hop_host": "cloudflare-ic.decix.net", "base_lat": 11.0},
{"hop_ip": "172.71.128.34", "hop_host": None, "base_lat": 11.5},
{"hop_ip": "104.16.132.229", "hop_host": "one.one.one.one", "base_lat": 11.8},
{"hop_ip": None, "hop_host": None, "base_lat": None}, # timeout hop
{"hop_ip": "172.71.0.150", "hop_host": None, "base_lat": 12.1},
{"hop_ip": "1.1.1.1", "hop_host": "one.one.one.one", "base_lat": 12.4},
],
gg_id: [
{"hop_ip": "192.168.178.1", "hop_host": "fritz.box", "base_lat": 1.1},
{"hop_ip": "62.155.243.1", "hop_host": "dslam-ffm.telekom.de", "base_lat": 5.6},
{"hop_ip": "62.157.250.22", "hop_host": "cr-ffm01.telekom.de", "base_lat": 8.0},
{"hop_ip": "62.157.250.89", "hop_host": "cr-ffm02.telekom.de", "base_lat": 8.7},
{"hop_ip": "80.156.160.178", "hop_host": "decix-peer.telekom.de", "base_lat": 10.1},
{"hop_ip": "209.85.149.32", "hop_host": "google-ic.decix.net", "base_lat": 11.2},
{"hop_ip": "108.170.236.57", "hop_host": None, "base_lat": 12.0},
{"hop_ip": "142.251.51.15", "hop_host": None, "base_lat": 13.1},
{"hop_ip": None, "hop_host": None, "base_lat": None}, # timeout hop
{"hop_ip": "108.170.232.97", "hop_host": None, "base_lat": 13.8},
{"hop_ip": "142.250.236.131", "hop_host": None, "base_lat": 14.2},
{"hop_ip": "8.8.8.8", "hop_host": "dns.google", "base_lat": 14.5},
],
}

trace_configs = [
{"target_id": cf_id, "days_ago": 5, "trigger": "outage", "reached": True},
{"target_id": cf_id, "days_ago": 2, "trigger": "packet_loss", "reached": True},
{"target_id": cf_id, "days_ago": 0, "trigger": "manual", "reached": True},
{"target_id": gg_id, "days_ago": 4, "trigger": "outage", "reached": True},
{"target_id": gg_id, "days_ago": 1, "trigger": "manual", "reached": True},
]

for tc in trace_configs:
template = hop_templates[tc["target_id"]]
hops = []
ips_for_fp = []
for i, tmpl in enumerate(template):
if tmpl["base_lat"] is None:
# Timeout hop
hops.append({
"hop_index": i + 1,
"hop_ip": None,
"hop_host": None,
"latency_ms": None,
"probes_responded": 0,
})
ips_for_fp.append("*")
else:
lat = round(tmpl["base_lat"] + rng.uniform(-0.5, 0.5), 2)
probes = 3 if rng.random() > 0.05 else rng.randint(1, 2)
hops.append({
"hop_index": i + 1,
"hop_ip": tmpl["hop_ip"],
"hop_host": tmpl["hop_host"],
"latency_ms": lat,
"probes_responded": probes,
})
ips_for_fp.append(tmpl["hop_ip"] or "*")

fp = hashlib.md5(">".join(ips_for_fp).encode()).hexdigest()[:16]
ts = (now - timedelta(days=tc["days_ago"], hours=rng.randint(0, 12))).timestamp()

cm.save_trace(
target_id=tc["target_id"],
timestamp=ts,
trigger_reason=tc["trigger"],
hops=hops,
route_fingerprint=fp,
reached_target=tc["reached"],
is_demo=True,
)

log.info("Demo: seeded %d traceroute traces", len(trace_configs))

@staticmethod
def _generate_bqm_png(width=800, height=200, seed=0):
"""Generate a simple BQM-style quality graph as PNG bytes."""
Expand Down
6 changes: 6 additions & 0 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,12 @@ def _run_collector(collector):
stop_event.wait(1)
finally:
executor.shutdown(wait=False, cancel_futures=True)
for c in collectors:
if hasattr(c, "stop"):
try:
c.stop()
except Exception:
pass

# Cleanup MQTT
if mqtt_pub:
Expand Down
15 changes: 15 additions & 0 deletions app/modules/connection_monitor/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from app.modules.connection_monitor.event_rules import ConnectionEventRules
from app.modules.connection_monitor.probe import ProbeEngine
from app.modules.connection_monitor.storage import ConnectionMonitorStorage
from app.modules.connection_monitor.traceroute_probe import TracerouteProbe
from app.modules.connection_monitor.traceroute_trigger import TracerouteTrigger

logger = logging.getLogger(__name__)

Expand All @@ -29,6 +31,7 @@ def __init__(self, config_mgr, storage, web, **kwargs):

method = config_mgr.get("connection_monitor_probe_method", "auto")
self._probe = ProbeEngine(method=method)
self._traceroute_probe = TracerouteProbe()
self._last_probe: dict[int, float] = {}
self._last_cleanup = 0.0
self._event_rules = ConnectionEventRules(
Expand All @@ -39,6 +42,10 @@ def __init__(self, config_mgr, storage, web, **kwargs):
data_dir = os.environ.get("DATA_DIR", "/data")
db_path = os.path.join(data_dir, "connection_monitor.db")
self._cm_storage = ConnectionMonitorStorage(db_path)
self._traceroute_trigger = TracerouteTrigger(
probe=self._traceroute_probe,
storage=self._cm_storage,
)

self._seeded = False
self._smart_capture = None
Expand Down Expand Up @@ -92,6 +99,7 @@ def collect(self) -> CollectorResult:
self._config_mgr.get("connection_monitor_retention_days", 0)
)
self._cm_storage.cleanup(retention)
self._cm_storage.cleanup_traces(retention)
self._last_cleanup = now

return CollectorResult.ok(self.name, {"probed": len(due)})
Expand Down Expand Up @@ -153,6 +161,9 @@ def _check_events(self, samples: list[dict]):
)
all_events.extend(events)

for event in all_events:
self._traceroute_trigger.on_event(event)

if all_events and hasattr(self._core_storage, "save_events_with_ids"):
self._core_storage.save_events_with_ids(all_events)
if self._smart_capture:
Expand All @@ -175,3 +186,7 @@ def get_storage(self) -> ConnectionMonitorStorage:
def get_probe(self) -> ProbeEngine:
"""Expose probe engine for capability endpoint."""
return self._probe

def stop(self):
"""Shutdown background resources."""
self._traceroute_trigger.shutdown()
16 changes: 15 additions & 1 deletion app/modules/connection_monitor/i18n/de.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,19 @@
"cm_resolution_raw": "Rohdaten",
"cm_resolution_1min": "1-Min-Durchschnitte",
"cm_resolution_5min": "5-Min-Durchschnitte",
"cm_resolution_1hr": "1-Std-Durchschnitte"
"cm_resolution_1hr": "1-Std-Durchschnitte",
"traceroute.run_button": "Traceroute starten",
"traceroute.running": "Traceroute wird ausgeführt...",
"traceroute.result": "Traceroute-Ergebnis",
"traceroute.trigger_manual": "Manuell",
"traceroute.trigger_outage": "Ausfall",
"traceroute.trigger_packet_loss": "Paketverlust",
"traceroute.hop": "Hop",
"traceroute.hops": "Hops",
"traceroute.reached": "Ziel erreicht",
"traceroute.not_reached": "Ziel nicht erreicht",
"traceroute.history": "Traceroute-Verlauf",
"traceroute.no_traces": "Noch keine Traceroutes aufgezeichnet.",
"traceroute.probes_responded": "Antworten",
"traceroute.partial_result": "Teilergebnis (Zeitüberschreitung)"
}
16 changes: 15 additions & 1 deletion app/modules/connection_monitor/i18n/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,19 @@
"cm_resolution_raw": "Raw samples",
"cm_resolution_1min": "1-min averages",
"cm_resolution_5min": "5-min averages",
"cm_resolution_1hr": "1-hour averages"
"cm_resolution_1hr": "1-hour averages",
"traceroute.run_button": "Run Traceroute",
"traceroute.running": "Running traceroute...",
"traceroute.result": "Traceroute Result",
"traceroute.trigger_manual": "Manual",
"traceroute.trigger_outage": "Outage",
"traceroute.trigger_packet_loss": "Packet Loss",
"traceroute.hop": "Hop",
"traceroute.hops": "Hops",
"traceroute.reached": "Target reached",
"traceroute.not_reached": "Target not reached",
"traceroute.history": "Traceroute History",
"traceroute.no_traces": "No traceroutes recorded yet.",
"traceroute.probes_responded": "Probes",
"traceroute.partial_result": "Partial result (timeout)"
}
16 changes: 15 additions & 1 deletion app/modules/connection_monitor/i18n/es.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,19 @@
"cm_resolution_raw": "Datos crudos",
"cm_resolution_1min": "Promedios de 1 min",
"cm_resolution_5min": "Promedios de 5 min",
"cm_resolution_1hr": "Promedios de 1 h"
"cm_resolution_1hr": "Promedios de 1 h",
"traceroute.run_button": "Ejecutar Traceroute",
"traceroute.running": "Ejecutando traceroute...",
"traceroute.result": "Resultado del Traceroute",
"traceroute.trigger_manual": "Manual",
"traceroute.trigger_outage": "Corte",
"traceroute.trigger_packet_loss": "Perdida de paquetes",
"traceroute.hop": "Salto",
"traceroute.hops": "Saltos",
"traceroute.reached": "Objetivo alcanzado",
"traceroute.not_reached": "Objetivo no alcanzado",
"traceroute.history": "Historial de Traceroute",
"traceroute.no_traces": "Aun no se han registrado traceroutes.",
"traceroute.probes_responded": "Sondas",
"traceroute.partial_result": "Resultado parcial (timeout)"
}
16 changes: 15 additions & 1 deletion app/modules/connection_monitor/i18n/fr.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,19 @@
"cm_resolution_raw": "Donnees brutes",
"cm_resolution_1min": "Moyennes 1 min",
"cm_resolution_5min": "Moyennes 5 min",
"cm_resolution_1hr": "Moyennes 1 h"
"cm_resolution_1hr": "Moyennes 1 h",
"traceroute.run_button": "Lancer Traceroute",
"traceroute.running": "Traceroute en cours...",
"traceroute.result": "Resultat du Traceroute",
"traceroute.trigger_manual": "Manuel",
"traceroute.trigger_outage": "Panne",
"traceroute.trigger_packet_loss": "Perte de paquets",
"traceroute.hop": "Saut",
"traceroute.hops": "Sauts",
"traceroute.reached": "Cible atteinte",
"traceroute.not_reached": "Cible non atteinte",
"traceroute.history": "Historique Traceroute",
"traceroute.no_traces": "Aucun traceroute enregistre.",
"traceroute.probes_responded": "Sondes",
"traceroute.partial_result": "Resultat partiel (timeout)"
}
91 changes: 90 additions & 1 deletion app/modules/connection_monitor/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import math
import re
import time
from datetime import datetime
from datetime import datetime, timezone

from flask import Blueprint, jsonify, request, Response

Expand Down Expand Up @@ -50,6 +50,31 @@ def _get_probe_engine():
return ProbeEngine(method=method)


_traceroute_probe = None


def _get_traceroute_probe():
global _traceroute_probe
if _traceroute_probe is None:
from app.modules.connection_monitor.traceroute_probe import TracerouteProbe
_traceroute_probe = TracerouteProbe()
return _traceroute_probe


def _epoch_to_iso(ts):
return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def _hop_to_dict(hop):
return {
"hop_index": hop.hop_index,
"hop_ip": hop.hop_ip,
"hop_host": hop.hop_host,
"latency_ms": hop.latency_ms,
"probes_responded": hop.probes_responded,
}


# --- Targets ---

@bp.route("/api/connection-monitor/targets", methods=["GET"])
Expand Down Expand Up @@ -516,3 +541,67 @@ def api_export_csv(target_id):
def api_capability():
probe = _get_probe_engine()
return jsonify(probe.capability_info())


# --- Traceroute ---

@bp.route("/api/connection-monitor/traceroute/<int:target_id>", methods=["POST"])
@require_auth
def api_run_traceroute(target_id):
storage = _get_cm_storage()
target = storage.get_target(target_id)
if not target:
return jsonify({"error": "Target not found"}), 404

probe = _get_traceroute_probe()
result = probe.run(target["host"])

trace_id = storage.save_trace(
target_id=target_id,
timestamp=time.time(),
trigger_reason="manual",
hops=[{
"hop_index": h.hop_index, "hop_ip": h.hop_ip,
"hop_host": h.hop_host, "latency_ms": h.latency_ms,
"probes_responded": h.probes_responded,
} for h in result.hops],
route_fingerprint=result.route_fingerprint,
reached_target=result.reached_target,
)

return jsonify({
"trace_id": trace_id,
"timestamp": _epoch_to_iso(time.time()),
"trigger_reason": "manual",
"reached_target": result.reached_target,
"hop_count": len(result.hops),
"route_fingerprint": result.route_fingerprint,
"hops": [_hop_to_dict(h) for h in result.hops],
})


@bp.route("/api/connection-monitor/traces/<int:target_id>")
@require_auth
def api_get_traces(target_id):
storage = _get_cm_storage()
start = request.args.get("start", type=float)
end = request.args.get("end", type=float)
limit = request.args.get("limit", 100, type=int)
limit = max(1, min(limit, 1000))
traces = storage.get_traces(target_id, start=start, end=end, limit=limit)
for t in traces:
t["timestamp"] = _epoch_to_iso(t["timestamp"])
return jsonify(traces)


@bp.route("/api/connection-monitor/trace/<int:trace_id>")
@require_auth
def api_get_trace_detail(trace_id):
storage = _get_cm_storage()
trace = storage.get_trace(trace_id)
if not trace:
return jsonify({"error": "Trace not found"}), 404
hops = storage.get_trace_hops(trace_id)
trace["timestamp"] = _epoch_to_iso(trace["timestamp"])
trace["hops"] = hops
return jsonify(trace)
Loading