Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
426 changes: 426 additions & 0 deletions docs/hole-punch-routers-qemu.md

Large diffs are not rendered by default.

15 changes: 8 additions & 7 deletions hole-punch/images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Usage: --test-select "~linux" or --test-ignore "!~linux"
test-aliases:
- alias: "failing"
value: "rust-v0.56 x rust-v0.56"
value: ""

# Router configurations - Multiple router types for testing different NAT implementations
routers:
Expand All @@ -14,6 +14,11 @@ routers:
type: local
path: images/linux
dockerfile: Dockerfile
- id: openbsd
source:
type: local
path: images/openbsd
dockerfile: Dockerfile

# Relay configurations - Multiple relay types for testing different implementations
relays:
Expand All @@ -22,9 +27,7 @@ relays:
type: local
path: images/rust/v0.56
dockerfile: Dockerfile.relay
# webrtc-direct removed: broken in this version (SRTP key length mismatch, see #20).
# Fixed upstream in webrtc-rs/webrtc#677. Re-enable when rust-libp2p cuts a new release with the fix.
transports: [quic-v1, tcp, ws]
transports: [quic-v1, tcp]
secureChannels: [noise, tls]
muxers: [yamux, mplex]

Expand All @@ -35,8 +38,6 @@ implementations:
type: local
path: images/rust/v0.56
dockerfile: Dockerfile.peer
# webrtc-direct removed: broken in this version (SRTP key length mismatch, see #20).
# Fixed upstream in webrtc-rs/webrtc#677. Re-enable when rust-libp2p cuts a new release with the fix.
transports: [quic-v1, tcp, ws]
transports: [quic-v1, tcp]
secureChannels: [noise, tls]
muxers: [yamux, mplex]
111 changes: 111 additions & 0 deletions hole-punch/images/openbsd/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Stage 1: Installer - netboot and install OpenBSD to virtual HDD
FROM debian:13-slim AS installer

# Install dependencies for download, disk prep, and QEMU
RUN apt-get update && apt-get install -y --no-install-recommends \
wget ca-certificates dosfstools qemu-system-x86 qemu-utils python3 \
&& rm -rf /var/lib/apt/lists/*

# Create directories
RUN mkdir /tftp /disks

# Download OpenBSD 7.8 amd64 boot files for PXE (from a mirror)
WORKDIR /tftp
RUN wget https://cdn.openbsd.org/pub/OpenBSD/7.8/amd64/pxeboot && \
wget https://cdn.openbsd.org/pub/OpenBSD/7.8/amd64/bsd.rd && \
ln -s bsd.rd bsd && \
ln -s pxeboot auto_install

# Download OpenBSD sets to local mirror directory
# The installer fetches from http://10.0.2.2/pub/OpenBSD/7.8/amd64/
RUN mkdir -p /tftp/pub/OpenBSD/7.8/amd64 && \
cd /tftp/pub/OpenBSD/7.8/amd64 && \
wget https://cdn.openbsd.org/pub/OpenBSD/7.8/amd64/base78.tgz && \
wget https://cdn.openbsd.org/pub/OpenBSD/7.8/amd64/comp78.tgz && \
wget https://cdn.openbsd.org/pub/OpenBSD/7.8/amd64/man78.tgz && \
wget https://cdn.openbsd.org/pub/OpenBSD/7.8/amd64/bsd && \
wget https://cdn.openbsd.org/pub/OpenBSD/7.8/amd64/bsd.mp && \
wget https://cdn.openbsd.org/pub/OpenBSD/7.8/amd64/bsd.rd && \
wget https://cdn.openbsd.org/pub/OpenBSD/7.8/amd64/SHA256 && \
wget https://cdn.openbsd.org/pub/OpenBSD/7.8/amd64/BUILDINFO

# Create /etc/ directory for boot.conf
RUN mkdir -p /tftp/etc

# Copy the boot.conf
COPY boot.conf /tftp/etc/boot.conf

# Copy random.seed
COPY random.seed /tftp/etc/random.seed

# Copy autoinstall file (assumes it's in build context)
COPY install.conf /tftp/install.conf

# Build site78.tgz with config files baked in (extracted to / on target)
# and install.site for dynamic post-install tasks.
# Including files directly in the tarball is more robust than relying solely
# on install.site heredocs — set extraction is bulletproof; script execution
# requires the file to be marked executable and can fail silently.
RUN mkdir -p /tmp/site/etc
COPY pf.conf /tmp/site/etc/pf.conf
COPY rc.local /tmp/site/etc/rc.local
COPY rc.conf.local /tmp/site/etc/rc.conf.local
COPY install.site /tmp/site/install.site
RUN chmod +x /tmp/site/install.site /tmp/site/etc/rc.local && \
touch /tmp/site/etc/firstboot_halt && \
cd /tmp/site && tar -czf /tftp/pub/OpenBSD/7.8/amd64/site78.tgz . && \
cd /tftp/pub/OpenBSD/7.8/amd64 && \
printf 'SHA256 (%s) = %s\n' site78.tgz "$(sha256sum site78.tgz | cut -d' ' -f1)" >> SHA256 && \
ls -l > index.txt

# Create 4GB qcow2 virtual HDD
RUN qemu-img create -f qcow2 /disks/hdd.qcow2 4G

# Run QEMU to netboot and install (unattended via auto_install.conf on disk)
# - Uses user networking with built-in TFTP for PXE
# - Boots from network, attaches HDD as vd0 (virtio)
# - Python HTTP server serves sets from /tftp (accessible at http://10.0.2.2/)
# - No graphics, serial to stdio; exits on reboot (install complete)
RUN (cd /tftp && python3 -m http.server 80 &) && sleep 2 && \
qemu-system-x86_64 \
-m 1G \
-boot order=n,menu=off \
-netdev user,id=net0,tftp=/tftp,bootfile=/auto_install \
-device e1000,netdev=net0 \
-drive file=/disks/hdd.qcow2,if=virtio,format=qcow2,cache=none,discard=unmap \
-nographic \
-serial mon:stdio \
-monitor null \
-no-reboot

# Second boot: consume rc.firsttime (SSH key generation, ldconfig, sysmerge, etc.)
# so runtime boots are fast. The install.site sentinel (/etc/firstboot_halt) causes
# rc.local to halt -p after rc.firsttime completes. -no-reboot makes QEMU exit on halt.
RUN qemu-system-x86_64 \
-m 1G \
-nic none \
-drive file=/disks/hdd.qcow2,if=virtio,format=qcow2,cache=none,discard=unmap \
-nographic \
-serial mon:stdio \
-monitor null \
-no-reboot

# Stage 2: Final image - run QEMU with installed OpenBSD
FROM debian:13-slim

# Install runtime dependencies for QEMU, bridging, and ISO generation
RUN apt-get update && apt-get install -y --no-install-recommends \
qemu-system-x86 qemu-utils bridge-utils iproute2 genisoimage procps \
&& rm -rf /var/lib/apt/lists/*

# Copy installed HDD from installer stage
COPY --from=installer /disks/hdd.qcow2 /hdd.qcow2

# Entrypoint script to setup bridges/taps, configure OpenBSD networking, and run QEMU
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

HEALTHCHECK --interval=2s --timeout=2s --start-period=120s --retries=1 \
CMD test -f /tmp/healthy

ENTRYPOINT ["/entrypoint.sh"]
2 changes: 2 additions & 0 deletions hole-punch/images/openbsd/boot.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
set tty com0
stty com0 115200
240 changes: 240 additions & 0 deletions hole-punch/images/openbsd/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
#!/bin/bash
# OpenBSD QEMU Router Entrypoint
# Sets up Linux bridges + TAP interfaces, creates a config drive ISO with
# dynamic network settings, and launches QEMU with MAC-matched NICs.

set -euo pipefail

echo "========================================"
echo "OpenBSD QEMU NAT Router"
echo "========================================"

# --- Required environment variables (passed by docker-compose) ---
WAN_IP="${WAN_IP:-}"
WAN_SUBNET="${WAN_SUBNET:-}"
LAN_IP="${LAN_IP:-}"
LAN_SUBNET="${LAN_SUBNET:-}"

if [ -z "$WAN_IP" ] || [ -z "$WAN_SUBNET" ] || [ -z "$LAN_IP" ] || [ -z "$LAN_SUBNET" ]; then
echo "ERROR: Missing required environment variables"
echo "Required: WAN_IP, WAN_SUBNET, LAN_IP, LAN_SUBNET"
exit 1
fi

# Docker Compose assigns these interface names via interface_name:
WAN_IF="wan0"
LAN_IF="lan0"

echo "Configuration:"
echo " WAN Interface: $WAN_IF IP: $WAN_IP Subnet: $WAN_SUBNET"
echo " LAN Interface: $LAN_IF IP: $LAN_IP Subnet: $LAN_SUBNET"

# --- Helper: CIDR prefix to dotted netmask ---
cidr_to_mask() {
local cidr=$1
local mask=$(( 0xffffffff << (32 - cidr) ))
printf "%d.%d.%d.%d" \
$(( (mask >> 24) & 255 )) \
$(( (mask >> 16) & 255 )) \
$(( (mask >> 8) & 255 )) \
$(( mask & 255 ))
}

# --- Helper: compute broadcast from IP and CIDR ---
compute_broadcast() {
local ip=$1
local cidr=$2
local IFS='.'
# shellcheck disable=SC2086
set -- $ip
local ip_int=$(( ($1 << 24) + ($2 << 16) + ($3 << 8) + $4 ))
local hostmask=$(( (1 << (32 - cidr)) - 1 ))
local bcast=$(( ip_int | hostmask ))
printf "%d.%d.%d.%d" \
$(( (bcast >> 24) & 255 )) \
$(( (bcast >> 16) & 255 )) \
$(( (bcast >> 8) & 255 )) \
$(( bcast & 255 ))
}

# --- Extract CIDR prefix from subnet ---
WAN_CIDR="${WAN_SUBNET#*/}"
LAN_CIDR="${LAN_SUBNET#*/}"

WAN_MASK=$(cidr_to_mask "$WAN_CIDR")
LAN_MASK=$(cidr_to_mask "$LAN_CIDR")

WAN_BCAST=$(compute_broadcast "$WAN_IP" "$WAN_CIDR")
LAN_BCAST=$(compute_broadcast "$LAN_IP" "$LAN_CIDR")

echo " WAN Mask: $WAN_MASK Broadcast: $WAN_BCAST"
echo " LAN Mask: $LAN_MASK Broadcast: $LAN_BCAST"

# --- Extract WAN gateway from routing table ---
WAN_GW=$(ip route show default dev "$WAN_IF" 2>/dev/null | awk '{print $3}' | head -1)
if [ -z "$WAN_GW" ]; then
# Fallback: first usable IP in WAN subnet (Docker bridge gateway)
local_IFS="$IFS"; IFS='.'
# shellcheck disable=SC2086
set -- ${WAN_SUBNET%/*}
WAN_GW="$1.$2.$3.$(( $4 + 1 ))"
IFS="$local_IFS"
echo " WAN Gateway (fallback): $WAN_GW"
else
echo " WAN Gateway: $WAN_GW"
fi

# --- Capture MAC addresses BEFORE flushing IPs ---
WAN_MAC=$(ip link show "$WAN_IF" | awk '/ether/{print $2}')
LAN_MAC=$(ip link show "$LAN_IF" | awk '/ether/{print $2}')
echo " WAN MAC: $WAN_MAC"
echo " LAN MAC: $LAN_MAC"

# --- Create /dev/net/tun if needed ---
if [ ! -c /dev/net/tun ]; then
mkdir -p /dev/net
mknod /dev/net/tun c 10 200
echo "Created /dev/net/tun"
fi

# --- Create TAP interfaces ---
ip tuntap add tap0 mode tap
ip tuntap add tap1 mode tap
ip link set tap0 up
ip link set tap1 up
echo "Created TAP interfaces"

# --- Create bridges and wire up ---
# WAN: wan0 <-> br-wan <-> tap0
ip link add br-wan type bridge
ip link set "$WAN_IF" master br-wan
ip link set tap0 master br-wan

# Flush IP from Docker interface (OpenBSD VM takes over)
ip addr flush dev "$WAN_IF"
ip link set "$WAN_IF" up
ip link set br-wan up

# LAN: lan0 <-> br-lan <-> tap1
ip link add br-lan type bridge
ip link set "$LAN_IF" master br-lan
ip link set tap1 master br-lan

ip addr flush dev "$LAN_IF"
ip link set "$LAN_IF" up
ip link set br-lan up

echo "Bridges configured: br-wan ($WAN_IF + tap0), br-lan ($LAN_IF + tap1)"

# --- Ensure transparent L2 forwarding for QEMU bridges ---
# Docker loads br_netfilter which causes bridged frames to traverse iptables
# FORWARD chain. Disable this for our bridges to prevent packet drops.
for br in br-wan br-lan; do
for f in nf_call_iptables nf_call_ip6tables nf_call_arptables; do
[ -f "/sys/class/net/$br/bridge/$f" ] && echo 0 > "/sys/class/net/$br/bridge/$f"
done
done

# Also try namespace-wide sysctl (exists only if br_netfilter is loaded)
for f in /proc/sys/net/bridge/bridge-nf-call-iptables \
/proc/sys/net/bridge/bridge-nf-call-ip6tables \
/proc/sys/net/bridge/bridge-nf-call-arptables; do
[ -f "$f" ] && echo 0 > "$f" 2>/dev/null || true
done

# Disable reverse path filtering on ALL interfaces (including newly created bridges/taps)
for f in /proc/sys/net/ipv4/conf/*/rp_filter; do
echo 0 > "$f" 2>/dev/null || true
done

echo "Bridge netfilter disabled, rp_filter disabled on all interfaces"

# --- Create config drive ISO ---
CONFIG_DIR=$(mktemp -d)

# OpenBSD hostname.if format: inet <address> <netmask> <broadcast>
echo "inet $WAN_IP $WAN_MASK $WAN_BCAST" > "$CONFIG_DIR/hostname.vio0"
echo "inet $LAN_IP $LAN_MASK $LAN_BCAST" > "$CONFIG_DIR/hostname.vio1"
echo "$WAN_GW" > "$CONFIG_DIR/mygate"

echo "Config drive contents:"
echo " hostname.vio0: $(cat "$CONFIG_DIR/hostname.vio0")"
echo " hostname.vio1: $(cat "$CONFIG_DIR/hostname.vio1")"
echo " mygate: $(cat "$CONFIG_DIR/mygate")"

genisoimage -quiet -r -V CONFIG -o /config.iso "$CONFIG_DIR"
rm -rf "$CONFIG_DIR"
echo "Config drive ISO created"

# --- Detect KVM ---
ENABLE_KVM=""
if [ -c /dev/kvm ]; then
ENABLE_KVM="-enable-kvm"
echo "KVM acceleration enabled"
else
echo "KVM not available, using software emulation"
fi

echo "========================================"
echo "Launching OpenBSD QEMU VM..."
echo "========================================"

# --- Launch QEMU ---
# - virtio disk with installed OpenBSD
# - CD-ROM config drive (mounted by rc.local)
# - Two virtio NICs bridged to Docker networks via TAP, with matching MACs
# - Serial output to file for readiness detection; tail mirrors to docker logs
qemu-system-x86_64 \
-m 512M \
$ENABLE_KVM \
-drive file=/hdd.qcow2,if=virtio,format=qcow2 \
-cdrom /config.iso \
-netdev tap,id=net0,ifname=tap0,script=no,downscript=no \
-device virtio-net-pci,netdev=net0,mac="$WAN_MAC" \
-netdev tap,id=net1,ifname=tap1,script=no,downscript=no \
-device virtio-net-pci,netdev=net1,mac="$LAN_MAC" \
-nographic \
-serial file:/tmp/console.log &
QEMU_PID=$!

# Mirror console output to docker logs
tail -f /tmp/console.log 2>/dev/null &
TAIL_PID=$!

# Forward SIGTERM/SIGINT to QEMU for graceful shutdown
cleanup_qemu() {
echo "Forwarding signal to QEMU (PID $QEMU_PID)..."
kill -TERM "$QEMU_PID" 2>/dev/null || true
wait "$QEMU_PID" 2>/dev/null || true
kill "$TAIL_PID" 2>/dev/null || true
}
trap cleanup_qemu SIGTERM SIGINT

# Poll for VM readiness (rc.local prints marker when networking + PF are up)
BOOT_TIMEOUT=120
ELAPSED=0
echo "Waiting for OpenBSD VM to finish booting (timeout: ${BOOT_TIMEOUT}s)..."
while [ "$ELAPSED" -lt "$BOOT_TIMEOUT" ]; do
# Check if QEMU crashed
if ! kill -0 "$QEMU_PID" 2>/dev/null; then
echo "ERROR: QEMU process exited unexpectedly"
exit 1
fi
# Check for readiness marker
if grep -q "rc.local: Network configuration complete." /tmp/console.log 2>/dev/null; then
echo "OpenBSD VM is ready (boot time: ${ELAPSED}s)"
touch /tmp/healthy
break
fi
sleep 1
ELAPSED=$((ELAPSED + 1))
done

if [ "$ELAPSED" -ge "$BOOT_TIMEOUT" ]; then
echo "ERROR: OpenBSD VM did not become ready within ${BOOT_TIMEOUT}s"
kill -TERM "$QEMU_PID" 2>/dev/null || true
exit 1
fi

# Keep container alive until QEMU exits
wait "$QEMU_PID"
Loading
Loading