Skip to content

Commit c95c45d

Browse files
committed
Merge remote-tracking branch 'origin/main' into qemu
2 parents d223fee + 8328365 commit c95c45d

File tree

10 files changed

+449
-57
lines changed

10 files changed

+449
-57
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,6 @@ lib/system/exec_agent/exec-agent
2424
# Envoy binaries
2525
lib/ingress/binaries/**
2626
dist/**
27+
28+
# UTM VM - downloaded ISO files
29+
scripts/utm/images/

DEVELOPMENT.md

Lines changed: 60 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ This document covers development setup, configuration, and contributing to Hypem
44

55
## Prerequisites
66

7+
> **macOS Users:** Hypeman requires KVM, which is only available on Linux. See [scripts/utm/README.md](scripts/utm/README.md) for instructions on setting up a Linux VM with nested virtualization on Apple Silicon Macs.
8+
79
**Go 1.25.4+**, **KVM**, **erofs-utils**, **dnsmasq**
810

911
```bash
@@ -13,17 +15,19 @@ dnsmasq --version
1315
```
1416

1517
**Install on Debian/Ubuntu:**
18+
1619
```bash
1720
sudo apt-get install erofs-utils dnsmasq
1821
```
1922

2023
**KVM Access:** User must be in `kvm` group for VM access:
24+
2125
```bash
2226
sudo usermod -aG kvm $USER
2327
# Log out and back in, or use: newgrp kvm
2428
```
2529

26-
**Network Capabilities:**
30+
**Network Capabilities:**
2731

2832
Before running or testing Hypeman, ensure IPv4 forwarding is enabled:
2933

@@ -39,6 +43,7 @@ sudo sysctl -p
3943
**Why:** Required for routing traffic between VM network and external network.
4044

4145
The hypeman binary needs network administration capabilities to create bridges and TAP devices:
46+
4247
```bash
4348
# After building, grant network capabilities
4449
sudo setcap 'cap_net_admin,cap_net_bind_service=+eip' /path/to/hypeman
@@ -78,34 +83,34 @@ root hard nofile 65536
7883

7984
Hypeman can be configured using the following environment variables:
8085

81-
| Variable | Description | Default |
82-
|----------|-------------|---------|
83-
| `PORT` | HTTP server port | `8080` |
84-
| `DATA_DIR` | Directory for storing VM images, volumes, and other data | `/var/lib/hypeman` |
85-
| `BRIDGE_NAME` | Name of the network bridge for VM networking | `vmbr0` |
86-
| `SUBNET_CIDR` | CIDR notation for the VM network subnet (gateway derived automatically) | `10.100.0.0/16` |
87-
| `UPLINK_INTERFACE` | Host network interface to use for VM internet access | _(auto-detect)_ |
88-
| `JWT_SECRET` | Secret key for JWT authentication (required for production) | _(empty)_ |
89-
| `DNS_SERVER` | DNS server IP address for VMs | `1.1.1.1` |
90-
| `MAX_CONCURRENT_BUILDS` | Maximum number of concurrent image builds | `1` |
91-
| `MAX_OVERLAY_SIZE` | Maximum size for overlay filesystem | `100GB` |
92-
| `ENV` | Deployment environment (filters telemetry, e.g. your name for dev) | `unset` |
93-
| `OTEL_ENABLED` | Enable OpenTelemetry traces/metrics | `false` |
94-
| `OTEL_ENDPOINT` | OTLP gRPC endpoint | `127.0.0.1:4317` |
95-
| `OTEL_SERVICE_INSTANCE_ID` | Instance ID for telemetry (differentiates multiple servers) | hostname |
96-
| `LOG_LEVEL` | Default log level (debug, info, warn, error) | `info` |
97-
| `LOG_LEVEL_<SUBSYSTEM>` | Per-subsystem log level (API, IMAGES, INSTANCES, NETWORK, VOLUMES, VMM, SYSTEM, EXEC, CADDY) | inherits default |
98-
| `CADDY_LISTEN_ADDRESS` | Address for Caddy ingress listeners | `0.0.0.0` |
99-
| `CADDY_ADMIN_ADDRESS` | Address for Caddy admin API | `127.0.0.1` |
100-
| `CADDY_ADMIN_PORT` | Port for Caddy admin API | `2019` |
101-
| `CADDY_STOP_ON_SHUTDOWN` | Stop Caddy when hypeman shuts down (set to `true` for dev) | `false` |
102-
| `ACME_EMAIL` | Email for ACME certificate registration (required for TLS ingresses) | _(empty)_ |
103-
| `ACME_DNS_PROVIDER` | DNS provider for ACME challenges: `cloudflare` | _(empty)_ |
104-
| `ACME_CA` | ACME CA URL (empty = Let's Encrypt production) | _(empty)_ |
105-
| `TLS_ALLOWED_DOMAINS` | Comma-separated allowed domains for TLS (e.g., `*.example.com,api.other.com`) | _(empty)_ |
106-
| `DNS_PROPAGATION_TIMEOUT` | Max time to wait for DNS propagation (e.g., `2m`) | _(empty)_ |
107-
| `DNS_RESOLVERS` | Comma-separated DNS resolvers for propagation checking | _(empty)_ |
108-
| `CLOUDFLARE_API_TOKEN` | Cloudflare API token (when using `cloudflare` provider) | _(empty)_ |
86+
| Variable | Description | Default |
87+
| -------------------------- | -------------------------------------------------------------------------------------------- | ------------------ |
88+
| `PORT` | HTTP server port | `8080` |
89+
| `DATA_DIR` | Directory for storing VM images, volumes, and other data | `/var/lib/hypeman` |
90+
| `BRIDGE_NAME` | Name of the network bridge for VM networking | `vmbr0` |
91+
| `SUBNET_CIDR` | CIDR notation for the VM network subnet (gateway derived automatically) | `10.100.0.0/16` |
92+
| `UPLINK_INTERFACE` | Host network interface to use for VM internet access | _(auto-detect)_ |
93+
| `JWT_SECRET` | Secret key for JWT authentication (required for production) | _(empty)_ |
94+
| `DNS_SERVER` | DNS server IP address for VMs | `1.1.1.1` |
95+
| `MAX_CONCURRENT_BUILDS` | Maximum number of concurrent image builds | `1` |
96+
| `MAX_OVERLAY_SIZE` | Maximum size for overlay filesystem | `100GB` |
97+
| `ENV` | Deployment environment (filters telemetry, e.g. your name for dev) | `unset` |
98+
| `OTEL_ENABLED` | Enable OpenTelemetry traces/metrics | `false` |
99+
| `OTEL_ENDPOINT` | OTLP gRPC endpoint | `127.0.0.1:4317` |
100+
| `OTEL_SERVICE_INSTANCE_ID` | Instance ID for telemetry (differentiates multiple servers) | hostname |
101+
| `LOG_LEVEL` | Default log level (debug, info, warn, error) | `info` |
102+
| `LOG_LEVEL_<SUBSYSTEM>` | Per-subsystem log level (API, IMAGES, INSTANCES, NETWORK, VOLUMES, VMM, SYSTEM, EXEC, CADDY) | inherits default |
103+
| `CADDY_LISTEN_ADDRESS` | Address for Caddy ingress listeners | `0.0.0.0` |
104+
| `CADDY_ADMIN_ADDRESS` | Address for Caddy admin API | `127.0.0.1` |
105+
| `CADDY_ADMIN_PORT` | Port for Caddy admin API | `2019` |
106+
| `CADDY_STOP_ON_SHUTDOWN` | Stop Caddy when hypeman shuts down (set to `true` for dev) | `false` |
107+
| `ACME_EMAIL` | Email for ACME certificate registration (required for TLS ingresses) | _(empty)_ |
108+
| `ACME_DNS_PROVIDER` | DNS provider for ACME challenges: `cloudflare` | _(empty)_ |
109+
| `ACME_CA` | ACME CA URL (empty = Let's Encrypt production) | _(empty)_ |
110+
| `TLS_ALLOWED_DOMAINS` | Comma-separated allowed domains for TLS (e.g., `*.example.com,api.other.com`) | _(empty)_ |
111+
| `DNS_PROPAGATION_TIMEOUT` | Max time to wait for DNS propagation (e.g., `2m`) | _(empty)_ |
112+
| `DNS_RESOLVERS` | Comma-separated DNS resolvers for propagation checking | _(empty)_ |
113+
| `CLOUDFLARE_API_TOKEN` | Cloudflare API token (when using `cloudflare` provider) | _(empty)_ |
109114

110115
**Important: Subnet Configuration**
111116

@@ -114,10 +119,12 @@ The default subnet `10.100.0.0/16` is chosen to avoid common conflicts. Hypeman
114119
If you need a different subnet, set `SUBNET_CIDR` in your environment. The gateway is automatically derived as the first IP in the subnet (e.g., `10.100.0.0/16``10.100.0.1`).
115120

116121
**Alternative subnets if needed:**
122+
117123
- `172.30.0.0/16` - Private range between common Docker (172.17.x.x) and cloud provider (172.31.x.x) ranges
118124
- `10.200.0.0/16` - Another private range option
119125

120126
**Example:**
127+
121128
```bash
122129
# In your .env file
123130
SUBNET_CIDR=172.30.0.0/16
@@ -128,23 +135,30 @@ SUBNET_CIDR=172.30.0.0/16
128135
`UPLINK_INTERFACE` tells Hypeman which host interface to use for routing VM traffic to the outside world (for iptables MASQUERADE rules). On many hosts this is `eth0`, but laptops and more complex setups often use Wi‑Fi or other names.
129136

130137
**Quick way to discover it:**
138+
131139
```bash
132140
# Ask the kernel which interface is used to reach the internet
133141
ip route get 1.1.1.1
134142
```
143+
135144
Look for the `dev` field in the output, for example:
145+
136146
```text
137147
1.1.1.1 via 192.168.12.1 dev wlp2s0 src 192.168.12.98
138148
```
149+
139150
In this case, `wlp2s0` is the uplink interface, so you would set:
151+
140152
```bash
141153
UPLINK_INTERFACE=wlp2s0
142154
```
143155

144156
You can also inspect all routes:
157+
145158
```bash
146159
ip route show
147160
```
161+
148162
Pick the interface used by the default route (usually the line starting with `default`). Avoid using local bridges like `docker0`, `br-...`, `virbr0`, or `vmbr0` as the uplink; those are typically internal virtual networks, not your actual internet-facing interface.
149163

150164
### TLS Ingress (HTTPS)
@@ -154,6 +168,7 @@ Hypeman uses Caddy with automatic ACME certificates for TLS termination. Certifi
154168
To enable TLS ingresses:
155169

156170
1. Configure ACME credentials in your `.env`:
171+
157172
```bash
158173
# Required for any TLS ingress
159174
@@ -164,6 +179,7 @@ CLOUDFLARE_API_TOKEN=your-api-token
164179
```
165180

166181
2. Create an ingress with TLS enabled:
182+
167183
```bash
168184
curl -X POST http://localhost:8080/v1/ingresses \
169185
-H "Content-Type: application/json" \
@@ -199,6 +215,7 @@ sudo chown $USER:$USER /var/lib/hypeman
199215
### Dockerhub login
200216

201217
Requires Docker Hub authentication to avoid rate limits when running the tests:
218+
202219
```bash
203220
docker login
204221
```
@@ -214,14 +231,17 @@ make build
214231
## Running the Server
215232

216233
1. Generate a JWT token for testing (optional):
234+
217235
```bash
218236
make gen-jwt
219237
```
220238

221239
2. Start the server with hot-reload for development:
240+
222241
```bash
223242
make dev
224243
```
244+
225245
The server will start on port 8080 (configurable via `PORT` environment variable).
226246

227247
### Local OpenTelemetry (optional)
@@ -232,15 +252,19 @@ To collect traces and metrics locally, run the Grafana LGTM stack (Loki, Grafana
232252
# Start Grafana LGTM (UI at http://localhost:3000, login: admin/admin)
233253
# Note, if you are developing on a shared server, you can use the same LGTM stack as your peer(s)
234254
# You will be able to sort your metrics, traces, and logs using the ENV configuration (see below)
255+
BIND=127.0.0.1
256+
# YOLO=1 # Uncomment to expose ports externally
257+
if [ -n "$YOLO" ]; then BIND=0.0.0.0; fi
258+
235259
docker run -d --name lgtm \
236-
-p 127.0.0.1:3000:3000 \
237-
-p 127.0.0.1:4317:4317 \
238-
-p 127.0.0.1:4318:4318 \
239-
-p 127.0.0.1:9090:9090 \
240-
-p 127.0.0.1:4040:4040 \
260+
-p $BIND:3000:3000 \
261+
-p $BIND:4317:4317 \
262+
-p $BIND:4318:4318 \
263+
-p $BIND:9090:9090 \
264+
-p $BIND:4040:4040 \
241265
grafana/otel-lgtm:latest
242266

243-
# If developing on a remote server, forward the port to your local machine:
267+
# If developing on a remote server, forward the port to your local machine (or YOLO):
244268
# ssh -L 3001:localhost:3000 your-server (then open http://localhost:3001)
245269

246270
# Enable OTel in .env (set ENV to your name to filter your telemetry)
@@ -254,6 +278,7 @@ make dev
254278
Open http://localhost:3000 to view traces (Tempo), metrics (Mimir), and logs (Loki) in Grafana.
255279

256280
**Import the Hypeman dashboard:**
281+
257282
1. Go to Dashboards → New → Import
258283
2. Upload `dashboards/hypeman.json` or paste its contents
259284
3. Select the Prometheus datasource and click Import

go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ require (
1212
github.com/ghodss/yaml v1.0.0
1313
github.com/go-chi/chi/v5 v5.2.3
1414
github.com/golang-jwt/jwt/v5 v5.3.0
15+
github.com/golang/protobuf v1.5.4
1516
github.com/google/go-containerregistry v0.20.6
1617
github.com/google/wire v0.7.0
1718
github.com/gorilla/websocket v1.5.3
@@ -43,7 +44,6 @@ require (
4344
golang.org/x/sync v0.17.0
4445
golang.org/x/sys v0.38.0
4546
google.golang.org/grpc v1.77.0
46-
google.golang.org/protobuf v1.36.10
4747
gvisor.dev/gvisor v0.0.0-20251125014920-fc40e232ff54
4848
)
4949

@@ -114,6 +114,7 @@ require (
114114
golang.org/x/tools v0.37.0 // indirect
115115
google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8 // indirect
116116
google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 // indirect
117+
google.golang.org/protobuf v1.36.10 // indirect
117118
gopkg.in/yaml.v2 v2.4.0 // indirect
118119
gopkg.in/yaml.v3 v3.0.1 // indirect
119120
gotest.tools/v3 v3.5.2 // indirect

lib/images/oci.go

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@ import (
44
"context"
55
"fmt"
66
"os"
7+
"runtime"
78
"strings"
89

910
"github.com/google/go-containerregistry/pkg/authn"
1011
"github.com/google/go-containerregistry/pkg/name"
12+
gcr "github.com/google/go-containerregistry/pkg/v1"
1113
"github.com/google/go-containerregistry/pkg/v1/empty"
1214
"github.com/google/go-containerregistry/pkg/v1/layout"
1315
"github.com/google/go-containerregistry/pkg/v1/remote"
@@ -60,24 +62,42 @@ func newOCIClient(cacheDir string) (*ociClient, error) {
6062
return &ociClient{cacheDir: cacheDir}, nil
6163
}
6264

65+
// currentPlatform returns the platform for the current host
66+
func currentPlatform() gcr.Platform {
67+
return gcr.Platform{
68+
Architecture: runtime.GOARCH,
69+
OS: runtime.GOOS,
70+
}
71+
}
72+
6373
// inspectManifest synchronously inspects a remote image to get its digest
6474
// without pulling the image. This is used for upfront digest discovery.
75+
// For multi-arch images, it returns the platform-specific manifest digest
76+
// (matching the current host platform) rather than the manifest index digest.
6577
func (c *ociClient) inspectManifest(ctx context.Context, imageRef string) (string, error) {
6678
ref, err := name.ParseReference(imageRef)
6779
if err != nil {
6880
return "", fmt.Errorf("parse image reference: %w", err)
6981
}
7082

71-
// Use system authentication (reads from ~/.docker/config.json, etc.)
72-
// Default retry: only on network errors, max ~1.3s total
73-
descriptor, err := remote.Head(ref,
83+
// Use remote.Image with platform filtering to get the platform-specific digest.
84+
// For multi-arch images, this resolves the manifest index to the correct platform.
85+
// This matches what pullToOCILayout does to ensure cache key consistency.
86+
// Note: remote.Image is lazy - it only fetches the manifest, not layer blobs.
87+
img, err := remote.Image(ref,
7488
remote.WithContext(ctx),
75-
remote.WithAuthFromKeychain(authn.DefaultKeychain))
89+
remote.WithAuthFromKeychain(authn.DefaultKeychain),
90+
remote.WithPlatform(currentPlatform()))
7691
if err != nil {
7792
return "", fmt.Errorf("fetch manifest: %w", wrapRegistryError(err))
7893
}
7994

80-
return descriptor.Digest.String(), nil
95+
digest, err := img.Digest()
96+
if err != nil {
97+
return "", fmt.Errorf("get image digest: %w", err)
98+
}
99+
100+
return digest.String(), nil
81101
}
82102

83103
// pullResult contains the metadata and digest from pulling an image
@@ -126,9 +146,11 @@ func (c *ociClient) pullToOCILayout(ctx context.Context, imageRef, layoutTag str
126146

127147
// Use system authentication (reads from ~/.docker/config.json, etc.)
128148
// Default retry: only on network errors, max ~1.3s total
149+
// WithPlatform ensures we pull the correct architecture for multi-arch images
129150
img, err := remote.Image(ref,
130151
remote.WithContext(ctx),
131-
remote.WithAuthFromKeychain(authn.DefaultKeychain))
152+
remote.WithAuthFromKeychain(authn.DefaultKeychain),
153+
remote.WithPlatform(currentPlatform()))
132154
if err != nil {
133155
// Rate limits fail here immediately (429 is not retried by default)
134156
return fmt.Errorf("fetch image manifest: %w", wrapRegistryError(err))

lib/system/versions.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,5 +74,8 @@ func GetArch() string {
7474
if arch == "amd64" {
7575
return "x86_64"
7676
}
77+
if arch == "arm64" {
78+
return "aarch64"
79+
}
7780
return arch
7881
}

lib/vmm/binaries.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ func ExtractBinary(p *paths.Paths, version CHVersion) (string, error) {
3030
arch := runtime.GOARCH
3131
if arch == "amd64" {
3232
arch = "x86_64"
33+
} else if arch == "arm64" {
34+
arch = "aarch64"
3335
}
3436

3537
embeddedPath := fmt.Sprintf("binaries/cloud-hypervisor/%s/%s/cloud-hypervisor", version, arch)

0 commit comments

Comments
 (0)