Skip to content

Use Cases Network Monitoring

Thomas Mangin edited this page Nov 15, 2025 · 1 revision

Network Monitoring and Topology Collection

ExaBGP enables network monitoring and topology collection using BGP-LS, route analysis, and integration with monitoring systems.

Table of Contents

Overview

Network Monitoring Use Cases

ExaBGP supports network monitoring for:

  • Topology discovery: Collect network topology via BGP-LS
  • Route tracking: Monitor BGP route changes and convergence
  • Traffic engineering visibility: Understand network paths and utilization
  • Anomaly detection: Detect routing anomalies and attacks
  • Compliance reporting: Track routing policy enforcement

ExaBGP Role

ExaBGP enables monitoring by:

  1. Passive route collection: Receive BGP updates from network
  2. BGP-LS topology export: Collect link-state information
  3. Metrics export: Expose routing metrics to Prometheus/Grafana
  4. Event notification: Alert on routing changes

Important: ExaBGP collects routing information but does NOT make forwarding decisions. Use collected data for analysis and visibility only.

BGP-LS Topology Collection

Collect Network Topology

Receive BGP-LS updates and build topology map:

#!/usr/bin/env python3
import sys
import json

# Topology database
NODES = {}
LINKS = {}

def process_bgp_ls_update(data):
    """Process BGP-LS update and build topology"""
    try:
        neighbor = data.get('neighbor', {})
        message = data.get('message', {})

        if 'update' in message:
            update = message['update']

            # Process BGP-LS announcements
            if 'announce' in update:
                bgp_ls = update['announce'].get('bgp-ls bgp-ls', {})

                for nlri_list in bgp_ls.values():
                    for nlri in nlri_list:
                        nlri_type = nlri.get('ls-nlri-type')

                        if nlri_type == 'node':
                            # Store node information
                            node_id = nlri.get('router-id')
                            NODES[node_id] = nlri

                        elif nlri_type == 'link':
                            # Store link information
                            local_node = nlri.get('local-node-descriptors', {}).get('router-id')
                            remote_node = nlri.get('remote-node-descriptors', {}).get('router-id')
                            link_id = f"{local_node}_{remote_node}"
                            LINKS[link_id] = nlri

    except Exception as e:
        # Log error
        pass

# Listen for BGP-LS updates
while True:
    line = sys.stdin.readline().strip()
    if not line:
        continue

    try:
        data = json.loads(line)
        if data.get('type') == 'update':
            process_bgp_ls_update(data)

            # Print topology summary
            print(f"# Nodes: {len(NODES)}, Links: {len(LINKS)}", file=sys.stderr)
    except:
        pass

Export Topology to Grafana

Export topology for visualization:

#!/usr/bin/env python3
import sys
import json
from prometheus_client import start_http_server, Gauge, Info

# Metrics
topology_nodes = Gauge('network_topology_nodes', 'Number of network nodes')
topology_links = Gauge('network_topology_links', 'Number of network links')
node_info = Info('network_node', 'Network node information')

start_http_server(9100)

NODES = {}
LINKS = {}

def update_metrics():
    """Update Prometheus metrics"""
    topology_nodes.set(len(NODES))
    topology_links.set(len(LINKS))

while True:
    line = sys.stdin.readline().strip()
    if not line:
        continue

    try:
        data = json.loads(line)
        # Process BGP-LS updates
        # ... parse and update NODES/LINKS ...
        update_metrics()
    except:
        pass

Route Analysis

Track BGP Route Changes

Monitor route announcements and withdrawals:

#!/usr/bin/env python3
import sys
import json
from datetime import datetime

# Route tracking
ROUTES = {}
ROUTE_HISTORY = []

def track_route_change(prefix, action, attributes):
    """Track route changes for analysis"""
    event = {
        'timestamp': datetime.utcnow().isoformat(),
        'prefix': prefix,
        'action': action,
        'attributes': attributes
    }

    ROUTE_HISTORY.append(event)

    # Keep last 1000 events
    if len(ROUTE_HISTORY) > 1000:
        ROUTE_HISTORY.pop(0)

    # Update current routes
    if action == 'announce':
        ROUTES[prefix] = attributes
    elif action == 'withdraw':
        ROUTES.pop(prefix, None)

def analyze_routing_stability():
    """Analyze routing stability (churn rate)"""
    recent_changes = [e for e in ROUTE_HISTORY[-100:]]
    announce_count = sum(1 for e in recent_changes if e['action'] == 'announce')
    withdraw_count = sum(1 for e in recent_changes if e['action'] == 'withdraw')

    churn_rate = (announce_count + withdraw_count) / 100

    if churn_rate > 0.5:  # High churn
        print(f"WARNING: High route churn detected: {churn_rate:.2f}",
              file=sys.stderr, flush=True)

# Listen for route updates
while True:
    line = sys.stdin.readline().strip()
    if not line:
        continue

    try:
        data = json.loads(line)
        neighbor_ip = data.get('neighbor', {}).get('address', {}).get('peer')
        message = data.get('message', {})

        if 'update' in message:
            update = message['update']

            # Track announcements
            if 'announce' in update:
                for family, routes in update['announce'].items():
                    for next_hop, prefixes in routes.items():
                        for prefix_data in prefixes:
                            prefix = prefix_data.get('nlri')
                            if prefix:
                                track_route_change(prefix, 'announce',
                                                   {'next_hop': next_hop})

            # Track withdrawals
            if 'withdraw' in update:
                for family, routes in update['withdraw'].items():
                    for prefix_data in routes:
                        prefix = prefix_data.get('nlri')
                        if prefix:
                            track_route_change(prefix, 'withdraw', {})

        # Periodic analysis
        if len(ROUTE_HISTORY) % 100 == 0:
            analyze_routing_stability()

    except Exception as e:
        pass

Detect Route Hijacks

Monitor for suspicious route announcements:

#!/usr/bin/env python3
import sys
import json
import ipaddress

# Expected AS ownership (simplified)
EXPECTED_OWNERSHIP = {
    '8.8.8.0/24': [15169],  # Google
    '1.1.1.0/24': [13335],  # Cloudflare
    # ... more mappings ...
}

def check_route_legitimacy(prefix, as_path):
    """Check if route announcement is legitimate"""
    try:
        # Check if prefix is more specific than expected
        prefix_obj = ipaddress.ip_network(prefix)

        for expected_prefix, expected_asns in EXPECTED_OWNERSHIP.items():
            expected_obj = ipaddress.ip_network(expected_prefix)

            # Check if announced prefix is within expected range
            if prefix_obj.subnet_of(expected_obj):
                # Check if origin AS matches
                origin_as = as_path[-1] if as_path else None

                if origin_as not in expected_asns:
                    return False, f"Unexpected AS {origin_as} for {prefix}"

    except:
        pass

    return True, None

# Monitor for hijacks
while True:
    line = sys.stdin.readline().strip()
    if not line:
        continue

    try:
        data = json.loads(line)
        message = data.get('message', {})

        if 'update' in message:
            update = message['update']

            if 'announce' in update:
                for family, routes in update['announce'].items():
                    for next_hop, prefixes in routes.items():
                        for prefix_data in prefixes:
                            prefix = prefix_data.get('nlri')
                            as_path = prefix_data.get('attribute', {}).get('as-path', [])

                            legitimate, reason = check_route_legitimacy(prefix, as_path)
                            if not legitimate:
                                print(f"ALERT: Possible route hijack - {reason}",
                                      file=sys.stderr, flush=True)

    except:
        pass

Integration with Monitoring Systems

Prometheus Exporter

Export BGP metrics to Prometheus:

#!/usr/bin/env python3
import sys
import json
from prometheus_client import start_http_server, Gauge, Counter

# Metrics
bgp_peers = Gauge('bgp_peers_total', 'Total BGP peers', ['state'])
bgp_routes = Gauge('bgp_routes_total', 'Total BGP routes', ['family'])
bgp_updates = Counter('bgp_updates_total', 'BGP updates received', ['type', 'family'])
bgp_session_uptime = Gauge('bgp_session_uptime_seconds', 'BGP session uptime', ['peer'])

start_http_server(9100)

# Track state
PEERS = {}
ROUTES = {}

def update_peer_metrics(peer_ip, state, uptime=0):
    """Update peer metrics"""
    PEERS[peer_ip] = {'state': state, 'uptime': uptime}

    # Update gauges
    states = {}
    for peer, info in PEERS.items():
        state = info['state']
        states[state] = states.get(state, 0) + 1
        bgp_session_uptime.labels(peer=peer).set(info['uptime'])

    for state, count in states.items():
        bgp_peers.labels(state=state).set(count)

def update_route_metrics(family, count):
    """Update route metrics"""
    ROUTES[family] = count
    bgp_routes.labels(family=family).set(count)

# Process BGP updates
while True:
    line = sys.stdin.readline().strip()
    if not line:
        continue

    try:
        data = json.loads(line)
        msg_type = data.get('type')

        if msg_type == 'state':
            # Peer state change
            peer = data.get('neighbor', {}).get('address', {}).get('peer')
            state = data.get('neighbor', {}).get('state')
            if peer and state:
                update_peer_metrics(peer, state)

        elif msg_type == 'update':
            # Route update
            message = data.get('message', {})

            if 'announce' in message.get('update', {}):
                bgp_updates.labels(type='announce', family='ipv4').inc()

            if 'withdraw' in message.get('update', {}):
                bgp_updates.labels(type='withdraw', family='ipv4').inc()

    except:
        pass

Syslog Integration

Send routing events to syslog:

#!/usr/bin/env python3
import sys
import json
import syslog

syslog.openlog('exabgp', syslog.LOG_PID, syslog.LOG_DAEMON)

# Process events
while True:
    line = sys.stdin.readline().strip()
    if not line:
        continue

    try:
        data = json.loads(line)
        msg_type = data.get('type')

        if msg_type == 'state':
            peer = data.get('neighbor', {}).get('address', {}).get('peer')
            state = data.get('neighbor', {}).get('state')
            syslog.syslog(syslog.LOG_INFO,
                          f"BGP peer {peer} state changed to {state}")

        elif msg_type == 'update':
            peer = data.get('neighbor', {}).get('address', {}).get('peer')
            syslog.syslog(syslog.LOG_DEBUG,
                          f"BGP update received from {peer}")

    except:
        pass

Configuration Examples

BGP-LS Collector

Configuration (/etc/exabgp/bgp-ls-collector.conf):

process topology-collector {
    run python3 /etc/exabgp/topology-collect.py;
    encoder json;
}

neighbor 10.0.0.1 {
    router-id 10.1.1.1;
    local-address 10.1.1.1;
    local-as 65001;
    peer-as 65001;

    family {
        bgp-ls bgp-ls;
    }

    api {
        processes [ topology-collector ];
        receive {
            parsed;
            update;
        }
    }
}

Route Monitor

Configuration (/etc/exabgp/route-monitor.conf):

process route-tracker {
    run python3 /etc/exabgp/route-track.py;
    encoder json;
}

neighbor 10.0.0.1 {
    router-id 10.1.1.1;
    local-address 10.1.1.1;
    local-as 65001;
    peer-as 65000;

    family {
        ipv4 unicast;
        ipv6 unicast;
    }

    api {
        processes [ route-tracker ];
        receive {
            parsed;
            update;
            neighbor-changes;
        }
    }
}

See Also

Address Families

Related Use Cases

Operations

API


πŸ‘» Ghost written by Claude (Anthropic AI)

Clone this wiki locally