Gozzim · Gozzim · Sep 14, 2025 · Sep 13, 2025 · Sep 13, 2025 · Sep 14, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,57 @@
+name: CI
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    services:
+      clickhouse:
+        image: clickhouse/clickhouse-server:23.8
+        ports:
+          - 9000:9000
+        options: >-
+          --health-cmd "clickhouse-client --query 'SELECT 1'" --health-interval 10s --health-timeout 5s --health-retries 5
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.12"
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        # Install versions of these packages compatible with Python 3.12
+        pip install "numpy>=1.26.0" "pandas>=2.1.0" "scipy>=1.11.0"
+        # Create a temporary requirements file without the upgraded packages
+        grep -vE "numpy|pandas|scipy" requirements.txt > temp_requirements.txt
+        # Install the remaining requirements
+        pip install -r temp_requirements.txt
+        pip install -r requirements-dev.txt
+
+    - name: Run tests and generate coverage report
+      env:
+        CLICKHOUSE_HOST: 127.0.0.1
+      run: |
+        pytest --cov=analyzer_lib --cov-report=xml
+
+    - name: Generate coverage badge
+      run: |
+        genbadge coverage -i coverage.xml -o coverage.svg
+
+    - name: Commit coverage badge
+      if: github.ref == 'refs/heads/master'
+      uses: EndBug/add-and-commit@v9
+      with:
+        author_name: 'github-actions[bot]'
+        author_email: 'github-actions[bot]@users.noreply.github.com'
+        message: 'chore: Update coverage badge'
+        add: 'coverage.svg'
diff --git a/README.md b/README.md
@@ -1,6 +1,8 @@
 # Traceroute Data Analyzer and Anomaly Detector
 
 [![CodeFactor](https://www.codefactor.io/repository/github/gozzim/ripe-atlas-traceroute-analysis/badge?s=23796f0031a238400a38e22d0679ee1bc5682d46)](https://www.codefactor.io/repository/github/gozzim/ripe-atlas-traceroute-analysis)
+[![CI](https://github.com/Gozzim/RIPE-Atlas-Traceroute-Analysis/actions/workflows/ci.yml/badge.svg)](https://github.com/Gozzim/RIPE-Atlas-Traceroute-Analysis/actions/workflows/ci.yml)
+[![Coverage Status](coverage.svg)](https://github.com/Gozzim/RIPE-Atlas-Traceroute-Analysis/actions/workflows/ci.yml)
 
 This project provides a comprehensive framework for analyzing RIPE Atlas traceroute data to detect network performance and routing anomalies. It can process large datasets from local files or a ClickHouse database, establish performance baselines, and compare current data against those baselines to identify significant deviations.
 

diff --git a/coverage.svg b/coverage.svg
diff --git a/data/example.bz2 b/data/example.bz2
diff --git a/data/example.json b/data/example.json
diff --git a/data/example2.bz2 b/data/example2.bz2
diff --git a/out/.gitkeep b/out/.gitkeep
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,4 +16,9 @@ ignore = ["E501"]
 quote-style = "double"
 indent-style = "space"
 skip-magic-trailing-comma = false
-line-ending = "lf"
+line-ending = "lf"
+
+[tool.pytest.ini_options]
+markers = [
+    "integration: marks tests as integration tests",
+]
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1 +1,4 @@
 ruff~=0.5.5
+pytest~=8.3.2
+pytest-cov~=5.0.0
+genbadge[all]~=1.1.0
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/analysis/__init__.py b/tests/analysis/__init__.py
diff --git a/tests/analysis/traceroute/__init__.py b/tests/analysis/traceroute/__init__.py
diff --git a/tests/analysis/traceroute/test_parser.py b/tests/analysis/traceroute/test_parser.py
@@ -0,0 +1,257 @@
+import ipaddress
+
+import orjson
+import pytest
+
+from analyzer_lib.analysis.traceroute.parser import (
+    _is_ip_private_or_special,
+    parse_traceroute,
+    reconstruct_path_tuple_from_string,
+)
+
+
+# Fixture for a base valid traceroute record
+@pytest.fixture
+def valid_traceroute_record():
+    return {
+        "fw": 5050,
+        "timestamp": 1700000000,
+        "endtime": 1700000002,
+        "prb_id": 1001,
+        "msm_id": 2002,
+        "src_addr": "1.1.1.1",
+        "dst_addr": "8.8.8.8",
+        "proto": "ICMP",
+        "destination_ip_responded": True,
+        "result": [
+            {"hop": 1, "result": [{"from": "1.1.1.1", "rtt": 0.5}]},
+            {"hop": 2, "result": [{"from": "1.0.0.1", "rtt": 1.2}]},
+            {"hop": 3, "result": [{"from": "8.8.8.8", "rtt": 5.8}]},
+        ],
+    }
+
+
+# --- Tests for _is_ip_private_or_special ---
+@pytest.mark.parametrize(
+    "ip_str, expected",
+    [
+        # Public IPs
+        ("1.1.1.1", False),
+        ("8.8.8.8", False),
+        ("208.67.222.222", False),
+        # Private IPs
+        ("192.168.1.1", True),
+        ("10.0.0.1", True),
+        ("172.16.0.1", True),
+        # Loopback
+        ("127.0.0.1", True),
+        # Link-local
+        ("169.254.0.1", True),
+        # Multicast
+        ("224.0.0.1", True),
+        # Reserved IPs (Treated as private by the ipaddress library in this env)
+        ("192.0.2.1", True),
+        # Special values
+        (None, False),
+        ("", False),
+        ("*", False),
+        # Invalid IPs
+        ("not an ip", False),
+        ("1.2.3.4.5", False),
+    ],
+)
+def test_is_ip_private_or_special(ip_str, expected):
+    """
+    Tests the _is_ip_private_or_special function with various IP addresses.
+    """
+    assert _is_ip_private_or_special(ip_str) is expected
+
+
+# --- Tests for parse_traceroute ---
+
+
+def test_parse_traceroute_valid_record(valid_traceroute_record):
+    """
+    Tests parsing a standard, valid traceroute record.
+    """
+    record_bytes = orjson.dumps(valid_traceroute_record)
+    parsed = parse_traceroute(record_bytes, None, None, None, None, None)
+
+    assert parsed is not None
+    assert parsed["prb_id"] == 1001
+    assert parsed["msm_id"] == 2002
+    assert parsed["dst_addr"] == "8.8.8.8"
+    assert parsed["dest_responded"] is True
+    assert parsed["final_rtt_ms"] == 5.8
+    assert parsed["first_hop_rtt_ms"] == 0.5
+    assert parsed["path_len"] == 3
+    assert parsed["timeouts_count"] == 0
+    assert parsed["first_hop_ip"] == "1.1.1.1"
+    assert parsed["last_hop_ip"] == "8.8.8.8"
+    assert parsed["hop_path_str"] == "1.1.1.1,1.0.0.1,8.8.8.8"
+
+
+def test_parse_traceroute_invalid_json():
+    """
+    Tests that invalid JSON returns None.
+    """
+    record_bytes = b'{"key": "value"'  # Malformed JSON
+    parsed = parse_traceroute(record_bytes, None, None, None, None, None)
+    assert parsed is None
+
+
+def test_parse_traceroute_missing_fields(valid_traceroute_record):
+    """
+    Tests that records with missing essential fields return None.
+    """
+    del valid_traceroute_record["prb_id"]
+    record_bytes = orjson.dumps(valid_traceroute_record)
+    parsed = parse_traceroute(record_bytes, None, None, None, None, None)
+    assert parsed is None
+
+
+@pytest.mark.parametrize(
+    "filters, expect_pass",
+    [
+        # Probe ID filters
+        ({"source_probe_ids": {1001}}, True),
+        ({"source_probe_ids": {9999}}, False),
+        # Protocol filters
+        ({"protocol_filters": {"ICMP"}}, True),
+        ({"protocol_filters": {"UDP"}}, False),
+        # Source network filters
+        ({"source_networks_to_filter": [ipaddress.ip_network("1.1.1.0/24")]}, True),
+        ({"source_networks_to_filter": [ipaddress.ip_network("2.2.2.0/24")]}, False),
+        # Destination IP filters
+        ({"dest_ips_to_filter": {"8.8.8.8"}}, True),
+        ({"dest_ips_to_filter": {"9.9.9.9"}}, False),
+        # Destination network filters
+        ({"dest_networks_to_filter": [ipaddress.ip_network("8.8.8.0/24")]}, True),
+        ({"dest_networks_to_filter": [ipaddress.ip_network("9.9.9.0/24")]}, False),
+    ],
+)
+def test_parse_traceroute_filters(valid_traceroute_record, filters, expect_pass):
+    """
+    Tests the filtering logic of parse_traceroute.
+    """
+    record_bytes = orjson.dumps(valid_traceroute_record)
+    # Set default values for filters not being tested in this run
+    all_filters = {
+        "source_probe_ids": None,
+        "source_networks_to_filter": None,
+        "dest_ips_to_filter": None,
+        "dest_networks_to_filter": None,
+        "protocol_filters": None,
+    }
+    all_filters.update(filters)
+    parsed = parse_traceroute(record_bytes, **all_filters)
+
+    if expect_pass:
+        assert parsed is not None
+    else:
+        assert parsed is None
+
+
+def test_parse_traceroute_private_destination_skipped(valid_traceroute_record):
+    """
+    Tests that a measurement with a private destination IP is skipped.
+    """
+    valid_traceroute_record["dst_addr"] = "192.168.1.1"
+    record_bytes = orjson.dumps(valid_traceroute_record)
+    parsed = parse_traceroute(record_bytes, None, None, None, None, None, include_private_ips_param=False)
+    assert parsed is None
+
+
+def test_parse_traceroute_private_hop_scrubbed(valid_traceroute_record):
+    """
+    Tests that private IPs in the hop path are scrubbed.
+    """
+    valid_traceroute_record["result"][1]["result"][0]["from"] = "10.0.0.1"
+    record_bytes = orjson.dumps(valid_traceroute_record)
+    parsed = parse_traceroute(record_bytes, None, None, None, None, None, include_private_ips_param=False)
+
+    assert parsed is not None
+    assert parsed["hop_path_str"] == "1.1.1.1,PRIVATE,8.8.8.8"
+
+
+def test_parse_traceroute_with_timeouts(valid_traceroute_record):
+    """
+    Tests parsing a record that contains timeouts.
+    """
+    valid_traceroute_record["result"][1]["result"] = [{"x": "*"}]
+    record_bytes = orjson.dumps(valid_traceroute_record)
+    parsed = parse_traceroute(record_bytes, None, None, None, None, None)
+
+    assert parsed is not None
+    assert parsed["timeouts_count"] == 1
+    assert parsed["hop_path_str"] == "1.1.1.1,*,8.8.8.8"
+
+
+def test_parse_traceroute_destination_not_responded(valid_traceroute_record):
+    """
+    Tests a record where the destination IP did not respond.
+    """
+    valid_traceroute_record["destination_ip_responded"] = False
+    valid_traceroute_record["result"][2]["result"][0]["from"] = "203.0.113.1"  # Different last hop
+    record_bytes = orjson.dumps(valid_traceroute_record)
+    parsed = parse_traceroute(record_bytes, None, None, None, None, None)
+
+    assert parsed is not None
+    assert parsed["dest_responded"] is False
+    assert parsed["final_rtt_ms"] is None
+
+
+def test_parse_traceroute_empty_result(valid_traceroute_record):
+    """
+    Tests parsing a record with an empty result list.
+    """
+    valid_traceroute_record["result"] = []
+    record_bytes = orjson.dumps(valid_traceroute_record)
+    parsed = parse_traceroute(record_bytes, None, None, None, None, None)
+
+    assert parsed is not None
+    assert parsed["path_len"] == 0
+    assert parsed["hop_path_str"] is None
+
+
+@pytest.mark.parametrize(
+    "input_str, expected_tuple",
+    [
+        ("1.1.1.1,2.2.2.2,3.3.3.3", ("1.1.1.1", "2.2.2.2", "3.3.3.3")),
+        ("1.1.1.1", ("1.1.1.1",)),
+        ("", tuple()),
+        (None, tuple()),
+    ],
+)
+def test_reconstruct_path_tuple_from_string(input_str, expected_tuple):
+    """
+    Tests the reconstruct_path_tuple_from_string function.
+    """
+    assert reconstruct_path_tuple_from_string(input_str) == expected_tuple
+
+
+def test_parse_traceroute_invalid_src_addr(valid_traceroute_record):
+    """
+    Tests that an invalid src_addr is handled correctly.
+    """
+    valid_traceroute_record["src_addr"] = "not an ip"
+    record_bytes = orjson.dumps(valid_traceroute_record)
+    parsed = parse_traceroute(record_bytes, None, [ipaddress.ip_network("1.1.1.0/24")], None, None, None)
+    assert parsed is None
+
+
+def test_parse_traceroute_invalid_hop_data(valid_traceroute_record):
+    """
+    Tests that invalid data in the hop result list is handled.
+    """
+    valid_traceroute_record["result"][1] = "invalid"
+    record_bytes = orjson.dumps(valid_traceroute_record)
+    parsed = parse_traceroute(record_bytes, None, None, None, None, None)
+    assert parsed is not None  # Should still parse the rest of the data
+
+
+def test_reconstruct_path_tuple_from_string_error():
+    """
+    Tests error handling in reconstruct_path_tuple_from_string.
+    """
+    assert reconstruct_path_tuple_from_string(123) == tuple()