Skip to content

Commit 44e87d6

Browse files
Distro CLI: Add proxy device infrastructure for testing
- Add Docker-based proxy device container simulating FBOSS services - Add btrfs snapshot support for service root directories - Add systemd service templates for FBOSS services - Add device initialization and entrypoint scripts - Update device_test.py with proxy device integration tests This enables end-to-end testing of device update functionality.
1 parent eb9d7b6 commit 44e87d6

File tree

15 files changed

+454
-7
lines changed

15 files changed

+454
-7
lines changed

fboss-image/distro_cli/tests/device_test.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,20 +100,14 @@ def tearDownClass(cls):
100100
shutil.rmtree(cls.container_temp_dir, ignore_errors=True)
101101

102102
def setup_image_command_test(self):
103-
"""Set up PXE boot infrastructure for image command tests.
104-
105-
Waits for the container's run_distro_infra.sh script to create the cache
106-
directory and copy iPXE boot files.
107-
"""
103+
"""Wait for container to create PXE boot infrastructure."""
108104
cache_dir = self.container_persistent_dir / "cache"
109105

110-
# Wait for cache directory to be created by container
111106
waitfor(
112107
cache_dir.exists,
113108
lambda: self.fail("Timed out waiting for cache directory to be created"),
114109
)
115110

116-
# Wait for all iPXE files to be created by container
117111
for filename in self.IPXE_FILES:
118112
cache_file = cache_dir / filename
119113
waitfor(
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
FROM quay.io/centos/centos:stream9
2+
3+
# Install systemd and clean up
4+
RUN dnf install -y systemd systemd-libs && \
5+
dnf clean all && \
6+
rm -rf /var/cache/dnf && \
7+
# Remove unnecessary systemd units
8+
rm -f /etc/systemd/system/*.wants/* \
9+
/lib/systemd/system/multi-user.target.wants/* \
10+
/lib/systemd/system/local-fs.target.wants/* \
11+
/lib/systemd/system/sockets.target.wants/*udev* \
12+
/lib/systemd/system/sockets.target.wants/*initctl* \
13+
/lib/systemd/system/basic.target.wants/* \
14+
/lib/systemd/system/anaconda.target.wants/*
15+
16+
# Install EPEL for btrfs-progs
17+
RUN dnf install -y epel-release && dnf clean all && rm -rf /var/cache/dnf
18+
19+
# Install SSH server and btrfs tools
20+
RUN dnf install -y \
21+
openssh-server openssh-clients \
22+
btrfs-progs \
23+
tar zstd \
24+
procps-ng \
25+
rsync \
26+
&& dnf clean all && rm -rf /var/cache/dnf
27+
28+
# Configure SSH
29+
RUN ssh-keygen -A && \
30+
mkdir -p /root/.ssh && \
31+
chmod 700 /root/.ssh
32+
33+
# Allow passwordless root login since it is only used in integration tests
34+
RUN sed -i 's/^root:[^:]*:/root::/' /etc/shadow && \
35+
sed -i 's/#PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config && \
36+
sed -i 's/#PermitEmptyPasswords.*/PermitEmptyPasswords yes/' /etc/ssh/sshd_config && \
37+
sed -i 's/#PasswordAuthentication.*/PasswordAuthentication yes/' /etc/ssh/sshd_config && \
38+
sed -i 's/^UsePAM yes/UsePAM no/' /etc/ssh/sshd_config.d/50-redhat.conf
39+
40+
# Create FBOSS directory structure
41+
RUN mkdir -p /opt/fboss/bin /opt/fboss/lib /updates
42+
43+
# Copy service template and create all service scripts from it
44+
COPY parts/services/service_template.sh /tmp/service_template.sh
45+
RUN for svc in wedge_agent fsdb qsfp_service platform_manager sensor_service fan_service data_corral_service; do \
46+
cp /tmp/service_template.sh /opt/fboss/bin/$svc; \
47+
done && \
48+
rm /tmp/service_template.sh
49+
50+
# Copy setup scripts
51+
COPY parts/systemd/ /etc/systemd/system/
52+
COPY parts/setup_btrfs.sh /usr/local/bin/
53+
COPY parts/entrypoint.sh /usr/local/bin/
54+
55+
RUN chmod +x /opt/fboss/bin/* /usr/local/bin/*.sh
56+
57+
# Enable services, SSH, and device init
58+
RUN systemctl enable sshd && \
59+
systemctl enable device-init && \
60+
systemctl enable wedge_agent && \
61+
systemctl enable fsdb && \
62+
systemctl enable qsfp_service && \
63+
systemctl enable platform_manager && \
64+
systemctl enable sensor_service && \
65+
systemctl enable fan_service && \
66+
systemctl enable data_corral_service
67+
68+
EXPOSE 22
69+
70+
# Use systemd as init
71+
CMD ["/sbin/init"]
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
# Proxy Device
2+
3+
A Docker container that simulates a FBOSS device for testing the `fboss-image` CLI commands, particularly the `device update` functionality.
4+
5+
## Purpose
6+
7+
The `device update` command creates btrfs subvolumes, installs artifacts, and restarts services. Testing this against real hardware is slow and impractical during development.
8+
9+
This container provides a lightweight simulation that:
10+
- Runs systemd as init (like a real device)
11+
- Creates a btrfs filesystem on a loopback file
12+
- Runs proxy FBOSS services in per-service btrfs subvolumes
13+
- Accepts SSH connections for CLI commands
14+
15+
## Architecture
16+
17+
```
18+
┌─────────────────────────────────────────────────────────────┐
19+
│ proxy_device │
20+
│ │
21+
│ ┌─────────────────────────────────────────────────────┐ │
22+
│ │ systemd (PID 1) │ │
23+
│ │ │ │
24+
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
25+
│ │ │ wedge_agent │ │ fsdb │ │ qsfp_service│ │ │
26+
│ │ │ (subvol) │ │ (subvol) │ │ (subvol) │ │ │
27+
│ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │
28+
│ │ │ │
29+
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
30+
│ │ │platform_mgr │ │ sensor_svc │ │ fan_service │ │ │
31+
│ │ │ (subvol) │ │ (subvol) │ │ (subvol) │ │ │
32+
│ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │
33+
│ │ │ │
34+
│ │ ┌─────────────┐ │ │
35+
│ │ │data_corral │ sshd (port 22) │ │
36+
│ │ │ (subvol) │ │ │
37+
│ │ └─────────────┘ │ │
38+
│ └─────────────────────────────────────────────────────┘ │
39+
│ │
40+
│ /mnt/btrfs/ │
41+
│ ├── distro-base/ (base subvolume) │
42+
│ └── updates/ │
43+
│ ├── wedge_agent-<ts>/ (service subvolume) │
44+
│ ├── fsdb-<ts>/ │
45+
│ ├── qsfp_service-<ts>/ │
46+
│ └── ... │
47+
└─────────────────────────────────────────────────────────────┘
48+
```
49+
50+
### Directory Structure Within Subvolumes
51+
52+
Each btrfs subvolume contains the FBOSS directory structure:
53+
54+
```
55+
/opt/fboss/
56+
├── bin/
57+
│ └── <service> # Service script (replaced by update)
58+
└── lib/ # Shared libraries
59+
```
60+
61+
- **Service scripts** (`/opt/fboss/bin/`) are stub scripts that write their version to `/var/run/<service>.version` and loop forever.
62+
- Updates replace these scripts with new versions.
63+
- Tests verify updates by checking the version file contents.
64+
65+
## Key Components
66+
67+
### Dockerfile
68+
Builds a CentOS Stream 9 image with:
69+
- systemd as init
70+
- btrfs-progs for filesystem operations
71+
- SSH server with passwordless root access
72+
- FBOSS service scripts at `/opt/fboss/bin/`
73+
74+
### parts/setup_btrfs.sh
75+
Runs at first boot via `device-init.service`:
76+
1. Creates a 512MB loopback file at `/var/btrfs.img`
77+
2. Formats it as btrfs and mounts at `/mnt/btrfs`
78+
3. Creates `distro-base` subvolume with FBOSS directory structure
79+
4. Snapshots base into per-service subvolumes under `/updates/`
80+
5. Creates systemd drop-ins setting `RootDirectory=` for each service
81+
82+
### parts/services/<service>
83+
Each service script (wedge_agent, fsdb, etc.) is a simple stub that:
84+
1. Writes its VERSION to `/var/run/<service>.version`
85+
2. Logs startup to `/var/log/<service>.log`
86+
3. Loops forever with `sleep 60`
87+
88+
The initial version is "1.0.0". Updates replace the script with a new version.
89+
90+
### Version Verification
91+
92+
Each service writes its version to `/var/run/<service>.version` **inside its btrfs subvolume**.
93+
94+
To verify from outside the service:
95+
```bash
96+
# Find the service's subvolume and check version
97+
SUBVOL=$(ls -d /mnt/btrfs/updates/wedge_agent-* | head -1)
98+
cat $SUBVOL/var/run/wedge_agent.version
99+
# Output: 1.0.0 (before update) or 2.0.0 (after update)
100+
```
101+
102+
Integration tests use this flow:
103+
1. Start container → services run with VERSION="1.0.0"
104+
2. Deploy update with VERSION="2.0.0" artifacts
105+
3. Restart service
106+
4. Verify version file changed to "2.0.0"
107+
108+
## Usage
109+
110+
```bash
111+
# Build the container image
112+
./build.sh
113+
114+
# Run standalone (for debugging)
115+
docker run -d --privileged --cgroupns=host --name proxy-device \
116+
fboss_proxy_device /sbin/init
117+
118+
# SSH into it (passwordless root login)
119+
ssh root@<container-ip>
120+
121+
# Check services
122+
docker exec proxy-device systemctl status wedge_agent
123+
124+
# Check subvolumes
125+
docker exec proxy-device ls /mnt/btrfs/updates/
126+
127+
# Check service version
128+
docker exec proxy-device bash -c 'cat /mnt/btrfs/updates/wedge_agent-*/var/run/wedge_agent.version'
129+
```
130+
131+
## Testing Updates
132+
133+
The `update` command replaces service scripts and restarts services.
134+
135+
**How it works:**
136+
137+
1. Service scripts start with VERSION="1.0.0" at `/opt/fboss/bin/<service>`
138+
2. Systemd runs these scripts: `ExecStart=/opt/fboss/bin/<service>`
139+
3. Updates replace scripts with new versions (e.g., VERSION="2.0.0")
140+
4. Service restarts and writes new version to `/var/run/<service>.version`
141+
142+
Tests verify updates by checking:
143+
- Version file contains expected version
144+
- Service is running (via systemctl status)
145+
- Log file shows new startup entry
146+
147+
## Requirements
148+
149+
- Docker with `--privileged` support (for systemd and loopback mounts)
150+
- `--cgroupns=host` for proper cgroup management
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
set -e
3+
4+
cd "$(dirname "$0")"
5+
6+
DOCKER_BUILDKIT=1 docker build . -t fboss_proxy_device
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/bash
2+
# Entrypoint script for device container
3+
# This is called by systemd after boot
4+
5+
set -e
6+
7+
# Setup btrfs loopback filesystem if not already done
8+
if [ ! -f /var/btrfs.img ]; then
9+
/usr/local/bin/setup_btrfs.sh
10+
fi
11+
12+
echo "Device container ready"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash
2+
VERSION="1.0.0"
3+
SERVICE_NAME=$(basename "$0")
4+
VERSION_FILE="/var/run/${SERVICE_NAME}.version"
5+
LOG_FILE="/var/log/${SERVICE_NAME}.log"
6+
7+
mkdir -p /var/run /var/log
8+
echo "$VERSION" >"$VERSION_FILE"
9+
echo "$(date): $SERVICE_NAME v$VERSION started (pid $$)" >>"$LOG_FILE"
10+
11+
while true; do sleep 60; done
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#!/bin/bash
2+
# Setup btrfs loopback filesystem with base snapshot and per-stack volumes
3+
set -euo pipefail
4+
5+
# Remove nologin file to allow SSH login (systemd creates this during boot)
6+
rm -f /run/nologin /var/run/nologin
7+
8+
BTRFS_IMG=/var/btrfs.img
9+
BTRFS_MOUNT=/mnt/btrfs
10+
11+
# Skip btrfs setup if already mounted (but nologin removal above always runs)
12+
if mountpoint -q "$BTRFS_MOUNT" 2>/dev/null; then
13+
echo "btrfs already mounted, skipping"
14+
exit 0
15+
fi
16+
DISTRO_BASE=/distro-base
17+
18+
# Clean up any stale loop devices from previous container runs
19+
# (loop devices are shared with host and persist after container exit)
20+
for dev in $(losetup -j "$BTRFS_IMG" 2>/dev/null | cut -d: -f1); do
21+
losetup -d "$dev" 2>/dev/null || true
22+
done
23+
24+
# Clean up any existing file from previous failed runs
25+
rm -f $BTRFS_IMG
26+
27+
# Create loopback file (1GB to accommodate full root filesystem copy)
28+
dd if=/dev/zero of=$BTRFS_IMG bs=1M count=1024
29+
30+
# Set up loop device first, then format and mount
31+
mkdir -p $BTRFS_MOUNT
32+
LOOP_DEV=$(losetup -f --show $BTRFS_IMG)
33+
mkfs.btrfs $LOOP_DEV
34+
mount $LOOP_DEV $BTRFS_MOUNT
35+
36+
# Copy the container's root filesystem to btrfs (mimics ONIE extracting rootfs)
37+
# Exclude special filesystems and the btrfs image itself to avoid recursion
38+
echo "Copying root filesystem to btrfs..."
39+
rsync -aAX \
40+
--exclude=/dev/* \
41+
--exclude=/proc/* \
42+
--exclude=/sys/* \
43+
--exclude=/run/* \
44+
--exclude=/mnt/* \
45+
--exclude=/tmp/* \
46+
--exclude=/var/btrfs.img \
47+
/ $BTRFS_MOUNT/
48+
49+
# Create base snapshot from the root copy (mimics real device installation)
50+
# This is what install.sh.tmpl does: btrfs subvolume snapshot ${demo_mnt} ${demo_mnt}/distro-base
51+
echo "Creating base snapshot..."
52+
btrfs subvolume snapshot $BTRFS_MOUNT $BTRFS_MOUNT/distro-base
53+
54+
# Fix /var/run symlink: replace with real directory so services can write version files
55+
# when running with RootDirectory isolation
56+
if [ -L $BTRFS_MOUNT/distro-base/var/run ]; then
57+
rm $BTRFS_MOUNT/distro-base/var/run
58+
mkdir -p $BTRFS_MOUNT/distro-base/var/run
59+
fi
60+
61+
# Create symlinks for easy access (remove existing dirs first)
62+
rm -rf $DISTRO_BASE /updates
63+
ln -sf $BTRFS_MOUNT/distro-base $DISTRO_BASE
64+
65+
# Create updates directory on btrfs
66+
mkdir -p $BTRFS_MOUNT/updates
67+
ln -sf $BTRFS_MOUNT/updates /updates
68+
69+
echo "Base snapshot created at $DISTRO_BASE (snapshot of root filesystem)"
70+
71+
# Create initial per-service subvolumes (mimics first update)
72+
# This allows integration tests to verify services run in subvolumes
73+
echo "Creating initial service subvolumes..."
74+
SERVICES="wedge_agent fsdb qsfp_service platform_manager sensor_service fan_service data_corral_service"
75+
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
76+
77+
for svc in $SERVICES; do
78+
SUBVOL_PATH="$BTRFS_MOUNT/updates/${svc}-${TIMESTAMP}"
79+
echo " Creating subvolume for $svc at $SUBVOL_PATH"
80+
btrfs subvolume snapshot $BTRFS_MOUNT/distro-base "$SUBVOL_PATH"
81+
82+
# Convert /var/run symlink to real directory so version files persist in subvolume
83+
# (by default /var/run -> ../run which points to tmpfs outside the subvolume)
84+
rm -f "$SUBVOL_PATH/var/run"
85+
mkdir -p "$SUBVOL_PATH/var/run"
86+
87+
# Create systemd drop-in to run service in its subvolume
88+
DROPIN_DIR="/etc/systemd/system/${svc}.service.d"
89+
mkdir -p "$DROPIN_DIR"
90+
cat >"$DROPIN_DIR/root-override.conf" <<EOF
91+
[Service]
92+
RootDirectory=$SUBVOL_PATH
93+
EOF
94+
done
95+
96+
# Reload systemd to pick up the drop-ins
97+
systemctl daemon-reload
98+
99+
echo "btrfs setup complete: Base=$DISTRO_BASE, Services in subvolumes"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[Unit]
2+
Description=Data Corral Service
3+
After=network.target device-init.service
4+
Requires=device-init.service
5+
6+
[Service]
7+
Type=simple
8+
ExecStart=/opt/fboss/bin/data_corral_service
9+
Restart=always
10+
RestartSec=5
11+
12+
[Install]
13+
WantedBy=multi-user.target

0 commit comments

Comments
 (0)