Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: CI

on:
push:
branches: [ "master" ]
pull_request:

jobs:
test:
runs-on: ubuntu-latest
permissions:
contents: write
services:
clickhouse:
image: clickhouse/clickhouse-server:23.8
ports:
- 9000:9000
options: >-
--health-cmd "clickhouse-client --query 'SELECT 1'" --health-interval 10s --health-timeout 5s --health-retries 5
steps:
- uses: actions/checkout@v3

- name: Set up Python 3.12
uses: actions/setup-python@v3
with:
python-version: "3.12"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
# Install versions of these packages compatible with Python 3.12
pip install "numpy>=1.26.0" "pandas>=2.1.0" "scipy>=1.11.0"
# Create a temporary requirements file without the upgraded packages
grep -vE "numpy|pandas|scipy" requirements.txt > temp_requirements.txt
# Install the remaining requirements
pip install -r temp_requirements.txt
pip install -r requirements-dev.txt
- name: Run tests and generate coverage report
env:
CLICKHOUSE_HOST: 127.0.0.1
run: |
pytest --cov=analyzer_lib --cov-report=xml
- name: Generate coverage badge
run: |
genbadge coverage -i coverage.xml -o coverage.svg
- name: Commit coverage badge
if: github.ref == 'refs/heads/master'
uses: EndBug/add-and-commit@v9
with:
author_name: 'github-actions[bot]'
author_email: 'github-actions[bot]@users.noreply.github.com'
message: 'chore: Update coverage badge'
add: 'coverage.svg'
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Traceroute Data Analyzer and Anomaly Detector

[![CodeFactor](https://www.codefactor.io/repository/github/gozzim/ripe-atlas-traceroute-analysis/badge?s=23796f0031a238400a38e22d0679ee1bc5682d46)](https://www.codefactor.io/repository/github/gozzim/ripe-atlas-traceroute-analysis)
[![CI](https://github.com/Gozzim/RIPE-Atlas-Traceroute-Analysis/actions/workflows/ci.yml/badge.svg)](https://github.com/Gozzim/RIPE-Atlas-Traceroute-Analysis/actions/workflows/ci.yml)
[![Coverage Status](coverage.svg)](https://github.com/Gozzim/RIPE-Atlas-Traceroute-Analysis/actions/workflows/ci.yml)

This project provides a comprehensive framework for analyzing RIPE Atlas traceroute data to detect network performance and routing anomalies. It can process large datasets from local files or a ClickHouse database, establish performance baselines, and compare current data against those baselines to identify significant deviations.

Expand Down
1 change: 1 addition & 0 deletions coverage.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/example.bz2
Binary file not shown.
200 changes: 200 additions & 0 deletions data/example.json

Large diffs are not rendered by default.

Binary file added data/example2.bz2
Binary file not shown.
Empty file added out/.gitkeep
Empty file.
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,9 @@ ignore = ["E501"]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "lf"
line-ending = "lf"

[tool.pytest.ini_options]
markers = [
"integration: marks tests as integration tests",
]
3 changes: 3 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
ruff~=0.5.5
pytest~=8.3.2
pytest-cov~=5.0.0
genbadge[all]~=1.1.0
Empty file added tests/__init__.py
Empty file.
Empty file added tests/analysis/__init__.py
Empty file.
Empty file.
257 changes: 257 additions & 0 deletions tests/analysis/traceroute/test_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
import ipaddress

import orjson
import pytest

from analyzer_lib.analysis.traceroute.parser import (
_is_ip_private_or_special,
parse_traceroute,
reconstruct_path_tuple_from_string,
)


# Fixture for a base valid traceroute record
@pytest.fixture
def valid_traceroute_record():
return {
"fw": 5050,
"timestamp": 1700000000,
"endtime": 1700000002,
"prb_id": 1001,
"msm_id": 2002,
"src_addr": "1.1.1.1",
"dst_addr": "8.8.8.8",
"proto": "ICMP",
"destination_ip_responded": True,
"result": [
{"hop": 1, "result": [{"from": "1.1.1.1", "rtt": 0.5}]},
{"hop": 2, "result": [{"from": "1.0.0.1", "rtt": 1.2}]},
{"hop": 3, "result": [{"from": "8.8.8.8", "rtt": 5.8}]},
],
}


# --- Tests for _is_ip_private_or_special ---
@pytest.mark.parametrize(
"ip_str, expected",
[
# Public IPs
("1.1.1.1", False),
("8.8.8.8", False),
("208.67.222.222", False),
# Private IPs
("192.168.1.1", True),
("10.0.0.1", True),
("172.16.0.1", True),
# Loopback
("127.0.0.1", True),
# Link-local
("169.254.0.1", True),
# Multicast
("224.0.0.1", True),
# Reserved IPs (Treated as private by the ipaddress library in this env)
("192.0.2.1", True),
# Special values
(None, False),
("", False),
("*", False),
# Invalid IPs
("not an ip", False),
("1.2.3.4.5", False),
],
)
def test_is_ip_private_or_special(ip_str, expected):
"""
Tests the _is_ip_private_or_special function with various IP addresses.
"""
assert _is_ip_private_or_special(ip_str) is expected


# --- Tests for parse_traceroute ---


def test_parse_traceroute_valid_record(valid_traceroute_record):
"""
Tests parsing a standard, valid traceroute record.
"""
record_bytes = orjson.dumps(valid_traceroute_record)
parsed = parse_traceroute(record_bytes, None, None, None, None, None)

assert parsed is not None
assert parsed["prb_id"] == 1001
assert parsed["msm_id"] == 2002
assert parsed["dst_addr"] == "8.8.8.8"
assert parsed["dest_responded"] is True
assert parsed["final_rtt_ms"] == 5.8
assert parsed["first_hop_rtt_ms"] == 0.5
assert parsed["path_len"] == 3
assert parsed["timeouts_count"] == 0
assert parsed["first_hop_ip"] == "1.1.1.1"
assert parsed["last_hop_ip"] == "8.8.8.8"
assert parsed["hop_path_str"] == "1.1.1.1,1.0.0.1,8.8.8.8"


def test_parse_traceroute_invalid_json():
"""
Tests that invalid JSON returns None.
"""
record_bytes = b'{"key": "value"' # Malformed JSON
parsed = parse_traceroute(record_bytes, None, None, None, None, None)
assert parsed is None


def test_parse_traceroute_missing_fields(valid_traceroute_record):
"""
Tests that records with missing essential fields return None.
"""
del valid_traceroute_record["prb_id"]
record_bytes = orjson.dumps(valid_traceroute_record)
parsed = parse_traceroute(record_bytes, None, None, None, None, None)
assert parsed is None


@pytest.mark.parametrize(
"filters, expect_pass",
[
# Probe ID filters
({"source_probe_ids": {1001}}, True),
({"source_probe_ids": {9999}}, False),
# Protocol filters
({"protocol_filters": {"ICMP"}}, True),
({"protocol_filters": {"UDP"}}, False),
# Source network filters
({"source_networks_to_filter": [ipaddress.ip_network("1.1.1.0/24")]}, True),
({"source_networks_to_filter": [ipaddress.ip_network("2.2.2.0/24")]}, False),
# Destination IP filters
({"dest_ips_to_filter": {"8.8.8.8"}}, True),
({"dest_ips_to_filter": {"9.9.9.9"}}, False),
# Destination network filters
({"dest_networks_to_filter": [ipaddress.ip_network("8.8.8.0/24")]}, True),
({"dest_networks_to_filter": [ipaddress.ip_network("9.9.9.0/24")]}, False),
],
)
def test_parse_traceroute_filters(valid_traceroute_record, filters, expect_pass):
"""
Tests the filtering logic of parse_traceroute.
"""
record_bytes = orjson.dumps(valid_traceroute_record)
# Set default values for filters not being tested in this run
all_filters = {
"source_probe_ids": None,
"source_networks_to_filter": None,
"dest_ips_to_filter": None,
"dest_networks_to_filter": None,
"protocol_filters": None,
}
all_filters.update(filters)
parsed = parse_traceroute(record_bytes, **all_filters)

if expect_pass:
assert parsed is not None
else:
assert parsed is None


def test_parse_traceroute_private_destination_skipped(valid_traceroute_record):
"""
Tests that a measurement with a private destination IP is skipped.
"""
valid_traceroute_record["dst_addr"] = "192.168.1.1"
record_bytes = orjson.dumps(valid_traceroute_record)
parsed = parse_traceroute(record_bytes, None, None, None, None, None, include_private_ips_param=False)
assert parsed is None


def test_parse_traceroute_private_hop_scrubbed(valid_traceroute_record):
"""
Tests that private IPs in the hop path are scrubbed.
"""
valid_traceroute_record["result"][1]["result"][0]["from"] = "10.0.0.1"
record_bytes = orjson.dumps(valid_traceroute_record)
parsed = parse_traceroute(record_bytes, None, None, None, None, None, include_private_ips_param=False)

assert parsed is not None
assert parsed["hop_path_str"] == "1.1.1.1,PRIVATE,8.8.8.8"


def test_parse_traceroute_with_timeouts(valid_traceroute_record):
"""
Tests parsing a record that contains timeouts.
"""
valid_traceroute_record["result"][1]["result"] = [{"x": "*"}]
record_bytes = orjson.dumps(valid_traceroute_record)
parsed = parse_traceroute(record_bytes, None, None, None, None, None)

assert parsed is not None
assert parsed["timeouts_count"] == 1
assert parsed["hop_path_str"] == "1.1.1.1,*,8.8.8.8"


def test_parse_traceroute_destination_not_responded(valid_traceroute_record):
"""
Tests a record where the destination IP did not respond.
"""
valid_traceroute_record["destination_ip_responded"] = False
valid_traceroute_record["result"][2]["result"][0]["from"] = "203.0.113.1" # Different last hop
record_bytes = orjson.dumps(valid_traceroute_record)
parsed = parse_traceroute(record_bytes, None, None, None, None, None)

assert parsed is not None
assert parsed["dest_responded"] is False
assert parsed["final_rtt_ms"] is None


def test_parse_traceroute_empty_result(valid_traceroute_record):
"""
Tests parsing a record with an empty result list.
"""
valid_traceroute_record["result"] = []
record_bytes = orjson.dumps(valid_traceroute_record)
parsed = parse_traceroute(record_bytes, None, None, None, None, None)

assert parsed is not None
assert parsed["path_len"] == 0
assert parsed["hop_path_str"] is None


@pytest.mark.parametrize(
"input_str, expected_tuple",
[
("1.1.1.1,2.2.2.2,3.3.3.3", ("1.1.1.1", "2.2.2.2", "3.3.3.3")),
("1.1.1.1", ("1.1.1.1",)),
("", tuple()),
(None, tuple()),
],
)
def test_reconstruct_path_tuple_from_string(input_str, expected_tuple):
"""
Tests the reconstruct_path_tuple_from_string function.
"""
assert reconstruct_path_tuple_from_string(input_str) == expected_tuple


def test_parse_traceroute_invalid_src_addr(valid_traceroute_record):
"""
Tests that an invalid src_addr is handled correctly.
"""
valid_traceroute_record["src_addr"] = "not an ip"
record_bytes = orjson.dumps(valid_traceroute_record)
parsed = parse_traceroute(record_bytes, None, [ipaddress.ip_network("1.1.1.0/24")], None, None, None)
assert parsed is None


def test_parse_traceroute_invalid_hop_data(valid_traceroute_record):
"""
Tests that invalid data in the hop result list is handled.
"""
valid_traceroute_record["result"][1] = "invalid"
record_bytes = orjson.dumps(valid_traceroute_record)
parsed = parse_traceroute(record_bytes, None, None, None, None, None)
assert parsed is not None # Should still parse the rest of the data


def test_reconstruct_path_tuple_from_string_error():
"""
Tests error handling in reconstruct_path_tuple_from_string.
"""
assert reconstruct_path_tuple_from_string(123) == tuple()
Loading