Skip to content

Commit db26f38

Browse files
sjmiller609rgarcia
andauthored
Network manager (#7)
* Network manager * Fix tests * an instance test with network passes, but needs more work * Test time * Simplify to default network only * Remove network logic from configdisk.go * Random IP distribution * Address concurrency and locking * Fix json parsing * Add a way to run just one test * Get network allocation before deleting VMM * Delete network working but delete looks messy * Don't need to manage DNS yet * Don't use CAP_SYS_ADMIN * Inheritable CAP_NET_ADMIN * Check for standby resume in network test * Cleanup taps on standby * Fix path to ch snap config * WIP: proved that network not working after restore * add stainless github action (#8) * Enable network capabilities on make dev * Address review comments * fix make dev capabilities * Fix network init * fixing host setup initialization for partially-initialized state * Improve error message with stale bridge config * cleanup orphaned taps and naming convention hype-* * Put our iptables rules at the top * Discover host uplink * 400 instead of 500 on name conflict * Adjust test for new tap name * Working on networking test * Fix network test * Delete redundant test * Addressing PR review comments * including ip and mac * Change default subnet to 10.100.0.0/16 * Detect and informatively error on network conflict * Derive gateway IP instead of config separately from subnet * Add high level explainer to README --------- Co-authored-by: Rafael <[email protected]>
1 parent f78b592 commit db26f38

40 files changed

+2806
-172
lines changed

.air.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ tmp_dir = "tmp"
55
[build]
66
args_bin = []
77
bin = "./tmp/main"
8-
cmd = "go build -tags containers_image_openpgp -o ./tmp/main ./cmd/api"
8+
cmd = "go build -tags containers_image_openpgp -o ./tmp/main ./cmd/api && sudo setcap cap_net_admin,cap_net_bind_service=+eip ./tmp/main"
99
delay = 1000
1010
exclude_dir = ["assets", "tmp", "vendor", "testdata", "bin", "scripts", "data", "kernel"]
1111
exclude_file = []
@@ -20,6 +20,7 @@ tmp_dir = "tmp"
2020
log = "build-errors.log"
2121
poll = false
2222
poll_interval = 0
23+
post_cmd = []
2324
rerun = false
2425
rerun_delay = 500
2526
send_interrupt = false
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
name: Stainless SDK preview on PRs
2+
3+
on:
4+
pull_request:
5+
types:
6+
- opened
7+
- synchronize
8+
- reopened
9+
- closed
10+
11+
concurrency:
12+
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
13+
cancel-in-progress: true
14+
15+
env:
16+
STAINLESS_ORG: ${{ vars.STAINLESS_ORG }}
17+
STAINLESS_PROJECT: ${{ vars.STAINLESS_PROJECT }}
18+
OAS_PATH: openapi.yaml
19+
CONFIG_PATH: stainless.yaml
20+
21+
jobs:
22+
preview:
23+
if: github.event.action != 'closed'
24+
runs-on: ubuntu-latest
25+
permissions:
26+
contents: read
27+
pull-requests: write
28+
steps:
29+
- name: Checkout repository
30+
uses: actions/checkout@v4
31+
with:
32+
fetch-depth: 2
33+
34+
- name: Run preview builds
35+
uses: stainless-api/upload-openapi-spec-action/preview@v1
36+
with:
37+
stainless_api_key: ${{ secrets.STAINLESS_API_KEY }}
38+
org: ${{ env.STAINLESS_ORG }}
39+
project: ${{ env.STAINLESS_PROJECT }}
40+
oas_path: ${{ env.OAS_PATH }}
41+
config_path: ${{ env.CONFIG_PATH }}
42+
make_comment: true
43+
github_token: ${{ secrets.GITHUB_TOKEN }}
44+
45+
merge:
46+
if: github.event.action == 'closed' && github.event.pull_request.merged == true
47+
runs-on: ubuntu-latest
48+
permissions:
49+
contents: read
50+
pull-requests: write
51+
steps:
52+
- name: Checkout repository
53+
uses: actions/checkout@v4
54+
with:
55+
fetch-depth: 2
56+
57+
- name: Run merge build
58+
uses: stainless-api/upload-openapi-spec-action/merge@v1
59+
with:
60+
stainless_api_key: ${{ secrets.STAINLESS_API_KEY }}
61+
org: ${{ env.STAINLESS_ORG }}
62+
project: ${{ env.STAINLESS_PROJECT }}
63+
oas_path: ${{ env.OAS_PATH }}
64+
make_comment: true
65+
github_token: ${{ secrets.GITHUB_TOKEN }}

Makefile

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,42 @@ dev: $(AIR)
115115
$(AIR) -c .air.toml
116116

117117
# Run tests
118+
# Compile test binaries and grant network capabilities (runs as user, not root)
119+
# Usage: make test - runs all tests
120+
# make test TEST=TestCreateInstanceWithNetwork - runs specific test
118121
test: ensure-ch-binaries lib/system/exec_agent/exec-agent
119-
go test -tags containers_image_openpgp -v -timeout 30s ./...
122+
@echo "Building test binaries..."
123+
@mkdir -p $(BIN_DIR)/tests
124+
@for pkg in $$(go list -tags containers_image_openpgp ./...); do \
125+
pkg_name=$$(basename $$pkg); \
126+
go test -c -tags containers_image_openpgp -o $(BIN_DIR)/tests/$$pkg_name.test $$pkg 2>/dev/null || true; \
127+
done
128+
@echo "Granting capabilities to test binaries..."
129+
@for test in $(BIN_DIR)/tests/*.test; do \
130+
if [ -f "$$test" ]; then \
131+
sudo setcap 'cap_net_admin,cap_net_bind_service=+eip' $$test 2>/dev/null || true; \
132+
fi; \
133+
done
134+
@echo "Running tests as current user with capabilities..."
135+
@if [ -n "$(TEST)" ]; then \
136+
echo "Running specific test: $(TEST)"; \
137+
for test in $(BIN_DIR)/tests/*.test; do \
138+
if [ -f "$$test" ]; then \
139+
echo ""; \
140+
echo "Checking $$(basename $$test) for $(TEST)..."; \
141+
$$test -test.run=$(TEST) -test.v -test.timeout=60s 2>&1 | grep -q "PASS\|FAIL" && \
142+
$$test -test.run=$(TEST) -test.v -test.timeout=60s || true; \
143+
fi; \
144+
done; \
145+
else \
146+
for test in $(BIN_DIR)/tests/*.test; do \
147+
if [ -f "$$test" ]; then \
148+
echo ""; \
149+
echo "Running $$(basename $$test)..."; \
150+
$$test -test.v -test.parallel=10 -test.timeout=60s || exit 1; \
151+
fi; \
152+
done; \
153+
fi
120154

121155
# Generate JWT token for testing
122156
# Usage: make gen-jwt [USER_ID=test-user]
@@ -131,4 +165,3 @@ clean:
131165
rm -f lib/exec/exec.pb.go
132166
rm -f lib/exec/exec_grpc.pb.go
133167
rm -f lib/system/exec_agent/exec-agent
134-

README.md

Lines changed: 102 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,17 @@ Run containerized workloads in VMs, powered by [Cloud Hypervisor](https://github
88

99
### Prerequisites
1010

11-
**Go 1.25.4+**, **KVM**, **erofs-utils**
11+
**Go 1.25.4+**, **KVM**, **erofs-utils**, **dnsmasq**
1212

1313
```bash
14+
# Verify prerequisites
1415
mkfs.erofs --version
16+
dnsmasq --version
17+
```
18+
19+
**Install on Debian/Ubuntu:**
20+
```bash
21+
sudo apt-get install erofs-utils dnsmasq
1522
```
1623

1724
**KVM Access:** User must be in `kvm` group for VM access:
@@ -20,13 +27,100 @@ sudo usermod -aG kvm $USER
2027
# Log out and back in, or use: newgrp kvm
2128
```
2229

30+
**Network Capabilities:**
31+
32+
Before running or testing Hypeman, ensure IPv4 forwarding is enabled:
33+
34+
```bash
35+
# Enable IPv4 forwarding (temporary - until reboot)
36+
sudo sysctl -w net.ipv4.ip_forward=1
37+
38+
# Enable IPv4 forwarding (persistent across reboots)
39+
echo 'net.ipv4.ip_forward=1' | sudo tee -a /etc/sysctl.conf
40+
sudo sysctl -p
41+
```
42+
43+
**Why:** Required for routing traffic between VM network and external network.
44+
45+
The hypeman binary needs network administration capabilities to create bridges and TAP devices:
46+
```bash
47+
# After building, grant network capabilities
48+
sudo setcap 'cap_net_admin,cap_net_bind_service=+eip' /path/to/hypeman
49+
50+
# For development builds
51+
sudo setcap 'cap_net_admin,cap_net_bind_service=+eip' ./bin/hypeman
52+
53+
# Verify capabilities
54+
getcap ./bin/hypeman
55+
```
56+
57+
**Note:** The `i` (inheritable) flag allows child processes spawned by hypeman (like `ip` and `iptables` commands) to inherit capabilities via the ambient capability set.
58+
59+
**Note:** These capabilities must be reapplied after each rebuild. For production deployments, set capabilities on the installed binary. For local testing, this is handled automatically in `make test`.
60+
2361
### Configuration
2462

2563
#### Environment variables
2664

65+
Hypeman can be configured using the following environment variables:
66+
67+
| Variable | Description | Default |
68+
|----------|-------------|---------|
69+
| `PORT` | HTTP server port | `8080` |
70+
| `DATA_DIR` | Directory for storing VM images, volumes, and other data | `/var/lib/hypeman` |
71+
| `BRIDGE_NAME` | Name of the network bridge for VM networking | `vmbr0` |
72+
| `SUBNET_CIDR` | CIDR notation for the VM network subnet (gateway derived automatically) | `10.100.0.0/16` |
73+
| `UPLINK_INTERFACE` | Host network interface to use for VM internet access | _(auto-detect)_ |
74+
| `JWT_SECRET` | Secret key for JWT authentication (required for production) | _(empty)_ |
75+
| `DNS_SERVER` | DNS server IP address for VMs | `1.1.1.1` |
76+
| `MAX_CONCURRENT_BUILDS` | Maximum number of concurrent image builds | `1` |
77+
| `MAX_OVERLAY_SIZE` | Maximum size for overlay filesystem | `100GB` |
78+
79+
**Important: Subnet Configuration**
80+
81+
The default subnet `10.100.0.0/16` is chosen to avoid common conflicts. Hypeman will detect conflicts with existing routes on startup and fail with guidance.
82+
83+
If you need a different subnet, set `SUBNET_CIDR` in your environment. The gateway is automatically derived as the first IP in the subnet (e.g., `10.100.0.0/16``10.100.0.1`).
84+
85+
**Alternative subnets if needed:**
86+
- `172.30.0.0/16` - Private range between common Docker (172.17.x.x) and AWS (172.31.x.x) ranges
87+
- `10.200.0.0/16` - Another private range option
88+
89+
**Example:**
90+
```bash
91+
# In your .env file
92+
SUBNET_CIDR=172.30.0.0/16
93+
```
94+
95+
**Finding the uplink interface (`UPLINK_INTERFACE`)**
96+
97+
`UPLINK_INTERFACE` tells Hypeman which host interface to use for routing VM traffic to the outside world (for iptables MASQUERADE rules). On many hosts this is `eth0`, but laptops and more complex setups often use Wi‑Fi or other names.
98+
99+
**Quick way to discover it:**
100+
```bash
101+
# Ask the kernel which interface is used to reach the internet
102+
ip route get 1.1.1.1
103+
```
104+
Look for the `dev` field in the output, for example:
105+
```text
106+
1.1.1.1 via 192.168.12.1 dev wlp2s0 src 192.168.12.98
107+
```
108+
In this case, `wlp2s0` is the uplink interface, so you would set:
109+
```bash
110+
UPLINK_INTERFACE=wlp2s0
111+
```
112+
113+
You can also inspect all routes:
114+
```bash
115+
ip route show
116+
```
117+
Pick the interface used by the default route (usually the line starting with `default`). Avoid using local bridges like `docker0`, `br-...`, `virbr0`, or `vmbr0` as the uplink; those are typically internal virtual networks, not your actual internet-facing interface.
118+
119+
**Setup:**
120+
27121
```bash
28122
cp .env.example .env
29-
# Edit .env and set JWT_SECRET
123+
# Edit .env and set JWT_SECRET and other configuration values
30124
```
31125

32126
#### Data directory
@@ -54,29 +148,27 @@ make build
54148
```
55149
### Running the Server
56150

57-
1. Copy the example environment file and modify the values:
58-
```bash
59-
cp .env.example .env
60-
# Edit .env and set JWT_SECRET and other configuration values
61-
```
62-
63-
2. Generate a JWT token for testing (optional):
151+
1. Generate a JWT token for testing (optional):
64152
```bash
65153
make gen-jwt
66154
```
67155

68-
3. Start the server with hot-reload for development:
156+
2. Start the server with hot-reload for development:
69157
```bash
70158
make dev
71159
```
72160
The server will start on port 8080 (configurable via `PORT` environment variable).
73161

74162
### Testing
75163

164+
Network tests require elevated permissions to create bridges and TAP devices.
165+
76166
```bash
77167
make test
78168
```
79169

170+
The test command compiles test binaries, grants capabilities via `sudo setcap`, then runs tests as the current user (not root). You may be prompted for your sudo password during the capability grant step.
171+
80172
### Code Generation
81173

82174
After modifying `openapi.yaml`, regenerate the Go code:

cmd/api/api/api.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"github.com/onkernel/hypeman/cmd/api/config"
55
"github.com/onkernel/hypeman/lib/images"
66
"github.com/onkernel/hypeman/lib/instances"
7+
"github.com/onkernel/hypeman/lib/network"
78
"github.com/onkernel/hypeman/lib/oapi"
89
"github.com/onkernel/hypeman/lib/volumes"
910
)
@@ -14,6 +15,7 @@ type ApiService struct {
1415
ImageManager images.Manager
1516
InstanceManager instances.Manager
1617
VolumeManager volumes.Manager
18+
NetworkManager network.Manager
1719
}
1820

1921
var _ oapi.StrictServerInterface = (*ApiService)(nil)
@@ -24,12 +26,14 @@ func New(
2426
imageManager images.Manager,
2527
instanceManager instances.Manager,
2628
volumeManager volumes.Manager,
29+
networkManager network.Manager,
2730
) *ApiService {
2831
return &ApiService{
2932
Config: config,
3033
ImageManager: imageManager,
3134
InstanceManager: instanceManager,
3235
VolumeManager: volumeManager,
36+
NetworkManager: networkManager,
3337
}
3438
}
3539

cmd/api/api/api_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"github.com/onkernel/hypeman/cmd/api/config"
1111
"github.com/onkernel/hypeman/lib/images"
1212
"github.com/onkernel/hypeman/lib/instances"
13+
"github.com/onkernel/hypeman/lib/network"
1314
"github.com/onkernel/hypeman/lib/paths"
1415
"github.com/onkernel/hypeman/lib/system"
1516
"github.com/onkernel/hypeman/lib/volumes"
@@ -28,8 +29,9 @@ func newTestService(t *testing.T) *ApiService {
2829
}
2930

3031
systemMgr := system.NewManager(p)
32+
networkMgr := network.NewManager(p, cfg)
3133
maxOverlaySize := int64(100 * 1024 * 1024 * 1024) // 100GB for tests
32-
instanceMgr := instances.NewManager(p, imageMgr, systemMgr, maxOverlaySize)
34+
instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, maxOverlaySize)
3335
volumeMgr := volumes.NewManager(p)
3436

3537
// Register cleanup for orphaned Cloud Hypervisor processes

cmd/api/api/exec_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,17 @@ func TestExecInstanceNonTTY(t *testing.T) {
7676

7777
// Create instance
7878
t.Log("Creating instance...")
79+
networkDisabled := false
7980
instResp, err := svc.CreateInstance(ctx(), oapi.CreateInstanceRequestObject{
8081
Body: &oapi.CreateInstanceRequest{
8182
Name: "exec-test",
8283
Image: "docker.io/library/nginx:alpine",
84+
Network: &struct {
85+
Enabled *bool `json:"enabled,omitempty"`
86+
Name *string `json:"name,omitempty"`
87+
}{
88+
Enabled: &networkDisabled,
89+
},
8390
},
8491
})
8592
require.NoError(t, err)

0 commit comments

Comments
 (0)