Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions cmd/oc/internal/commands/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,6 @@ var sandboxCreateCmd = &cobra.Command{

printer.Print(sandbox, func() {
fmt.Printf("Created sandbox %s (status: %s)\n", sandbox.ID, sandbox.Status)
if sandbox.ConnectURL != "" {
fmt.Printf("Connect URL: %s\n", sandbox.ConnectURL)
}
})
return nil
},
Expand Down Expand Up @@ -106,9 +103,6 @@ var sandboxGetCmd = &cobra.Command{
fmt.Printf("Memory: %dMB\n", sandbox.MemoryMB)
fmt.Printf("Started: %s\n", sandbox.StartedAt.Format(time.RFC3339))
fmt.Printf("Ends: %s\n", sandbox.EndAt.Format(time.RFC3339))
if sandbox.ConnectURL != "" {
fmt.Printf("Connect: %s\n", sandbox.ConnectURL)
}
})
return nil
},
Expand Down
16 changes: 16 additions & 0 deletions cmd/oc/internal/commands/shell.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"os"
"os/signal"
"syscall"
"time"

"github.com/gorilla/websocket"
"github.com/opensandbox/opensandbox/cmd/oc/internal/client"
Expand Down Expand Up @@ -91,6 +92,21 @@ Examples:

done := make(chan struct{})

// WebSocket keepalive: send ping every 30s to prevent idle timeout
// (Cloudflare drops idle WebSocket connections after 100s)
go func() {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
conn.WriteControl(websocket.PingMessage, nil, time.Now().Add(5*time.Second))
case <-done:
return
}
}
}()

// Read from WebSocket → stdout
go func() {
defer close(done)
Expand Down
2 changes: 2 additions & 0 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,8 @@ func main() {
KeyName: cfg.EC2KeyName,
IAMInstanceProfile: cfg.EC2IAMInstanceProfile,
SecretsARN: cfg.SecretsARN,
CFAPIToken: cfg.CFAPIToken,
CFZoneID: cfg.CFZoneID,
})
if err != nil {
log.Fatalf("opensandbox: failed to create EC2 pool: %v", err)
Expand Down
17 changes: 17 additions & 0 deletions cmd/worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,23 @@ func main() {
// Start HTTP server for direct SDK access
httpServer := worker.NewHTTPServer(mgr, ptyMgr, jwtIssuer, sandboxDBMgr, sbProxy, sbRouter, cfg.SandboxDomain)
httpAddr := fmt.Sprintf(":%d", cfg.Port)

// Serve HTTPS on :443 if Cloudflare Origin Certificate is present
originCert := "/etc/opensandbox/origin-cert.pem"
originKey := "/etc/opensandbox/origin-key.pem"
_, certErr := os.Stat(originCert)
_, keyErr := os.Stat(originKey)
if certErr == nil && keyErr == nil {
tlsServer := worker.NewHTTPServer(mgr, ptyMgr, jwtIssuer, sandboxDBMgr, sbProxy, sbRouter, cfg.SandboxDomain)
log.Printf("opensandbox-worker: starting HTTPS server on :443 (Cloudflare origin cert)")
go func() {
if err := tlsServer.StartTLS(":443", originCert, originKey); err != nil {
log.Printf("HTTPS server error: %v", err)
}
}()
}

// Always serve plain HTTP for internal VPC access (control plane proxy)
log.Printf("opensandbox-worker: starting HTTP server on %s", httpAddr)
go func() {
if err := httpServer.Start(httpAddr); err != nil {
Expand Down
101 changes: 93 additions & 8 deletions deploy/ec2/setup-instance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -220,37 +220,114 @@ echo "==> Installing identity service..."
sudo tee /usr/local/bin/opensandbox-worker-identity.sh > /dev/null << 'IDENT'
#!/usr/bin/env bash
set -euo pipefail
TOKEN=$(curl -s -X PUT "http://169.254.169.254/latest/api/token" \

CF_API_TOKEN="${OPENSANDBOX_CF_API_TOKEN:-}"
CF_ZONE_ID="${OPENSANDBOX_CF_ZONE_ID:-}"
WORKER_DOMAIN="workers.opencomputer.dev"

# Query EC2 instance metadata (IMDSv2)
IMDS_TOKEN=$(curl -s -X PUT "http://169.254.169.254/latest/api/token" \
-H "X-aws-ec2-metadata-token-ttl-seconds: 300")
INSTANCE_ID=$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" \
INSTANCE_ID=$(curl -s -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" \
http://169.254.169.254/latest/meta-data/instance-id)
PRIVATE_IP=$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" \
PRIVATE_IP=$(curl -s -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" \
http://169.254.169.254/latest/meta-data/local-ipv4)
PUBLIC_IP=$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" \
PUBLIC_IP=$(curl -s -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" \
http://169.254.169.254/latest/meta-data/public-ipv4 || echo "")

SHORT_ID=$(echo "$INSTANCE_ID" | sed 's/^i-//' | cut -c1-8)
WORKER_ID="w-use2-${SHORT_ID}"
WORKER_HOSTNAME="${WORKER_ID}.${WORKER_DOMAIN}"
IP="${PUBLIC_IP:-$PRIVATE_IP}"

mkdir -p /etc/opensandbox

# Register proxied A record in Cloudflare (Cloudflare terminates TLS)
if [ -n "$CF_API_TOKEN" ] && [ -n "$CF_ZONE_ID" ] && [ -n "$IP" ]; then
echo "opensandbox-identity: registering DNS ${WORKER_HOSTNAME} -> ${IP} (Cloudflare proxied)"

RESP=$(curl -s -X POST \
"https://api.cloudflare.com/client/v4/zones/${CF_ZONE_ID}/dns_records" \
-H "Authorization: Bearer ${CF_API_TOKEN}" \
-H "Content-Type: application/json" \
--data "{
\"type\": \"A\",
\"name\": \"${WORKER_HOSTNAME}\",
\"content\": \"${IP}\",
\"ttl\": 1,
\"proxied\": true
}")

RECORD_ID=$(echo "$RESP" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
SUCCESS=$(echo "$RESP" | grep -o '"success":true')

if [ -n "$SUCCESS" ] && [ -n "$RECORD_ID" ]; then
echo "opensandbox-identity: DNS registered (record_id=${RECORD_ID})"
HTTP_ADDR="https://${WORKER_HOSTNAME}"
else
echo "opensandbox-identity: WARNING: DNS registration failed: ${RESP}"
HTTP_ADDR="http://${IP}:8080"
RECORD_ID=""
fi

# Save record ID for cleanup on shutdown
cat > /etc/opensandbox/worker-dns.env << EOF
CF_API_TOKEN=${CF_API_TOKEN}
CF_ZONE_ID=${CF_ZONE_ID}
CF_RECORD_ID=${RECORD_ID}
WORKER_HOSTNAME=${WORKER_HOSTNAME}
EOF
else
echo "opensandbox-identity: WARNING: no CF credentials, using raw IP (no TLS)"
HTTP_ADDR="http://${IP}:8080"
cat > /etc/opensandbox/worker-dns.env << EOF
CF_API_TOKEN=
CF_ZONE_ID=
CF_RECORD_ID=
WORKER_HOSTNAME=${WORKER_HOSTNAME}
EOF
fi

cat > /etc/opensandbox/worker-identity.env << EOF
OPENSANDBOX_WORKER_ID=${WORKER_ID}
OPENSANDBOX_HTTP_ADDR=http://${PUBLIC_IP:-$PRIVATE_IP}:8080
OPENSANDBOX_HTTP_ADDR=${HTTP_ADDR}
OPENSANDBOX_GRPC_ADVERTISE=${PRIVATE_IP}:9090
EOF
echo "opensandbox-identity: ${WORKER_ID} private=${PRIVATE_IP} public=${PUBLIC_IP:-none}"

echo "opensandbox-identity: ${WORKER_ID} private=${PRIVATE_IP} public=${PUBLIC_IP:-none} addr=${HTTP_ADDR}"
IDENT
sudo chmod +x /usr/local/bin/opensandbox-worker-identity.sh

# DNS cleanup script — deletes the Cloudflare A record on shutdown
sudo tee /usr/local/bin/opensandbox-worker-dns-cleanup.sh > /dev/null << 'CLEANUP'
#!/usr/bin/env bash
set -euo pipefail
source /etc/opensandbox/worker-dns.env 2>/dev/null || exit 0
[ -z "$CF_API_TOKEN" ] && exit 0
[ -z "$CF_ZONE_ID" ] && exit 0
[ -z "$CF_RECORD_ID" ] && exit 0

echo "opensandbox-dns-cleanup: removing ${WORKER_HOSTNAME} (record_id=${CF_RECORD_ID})"
curl -s -X DELETE \
"https://api.cloudflare.com/client/v4/zones/${CF_ZONE_ID}/dns_records/${CF_RECORD_ID}" \
-H "Authorization: Bearer ${CF_API_TOKEN}" \
> /dev/null 2>&1 || echo "opensandbox-dns-cleanup: WARNING: failed to remove DNS record"
CLEANUP
sudo chmod +x /usr/local/bin/opensandbox-worker-dns-cleanup.sh

sudo tee /etc/systemd/system/opensandbox-identity.service > /dev/null << 'SVC'
[Unit]
Description=OpenSandbox Worker Identity (from EC2 IMDS)
Description=OpenSandbox Worker Identity + Cloudflare DNS
After=network-online.target
Wants=network-online.target
Before=opensandbox-worker.service

[Service]
Type=oneshot
RemainAfterExit=yes
EnvironmentFile=-/etc/opensandbox/cloudflare.env
ExecStart=/usr/local/bin/opensandbox-worker-identity.sh
ExecStop=/usr/local/bin/opensandbox-worker-dns-cleanup.sh

[Install]
WantedBy=multi-user.target
Expand All @@ -277,7 +354,7 @@ Environment=OPENSANDBOX_MODE=worker
Environment=OPENSANDBOX_PORT=8080
Environment=OPENSANDBOX_REGION=use2
Environment=OPENSANDBOX_DATA_DIR=/data
Environment=OPENSANDBOX_SANDBOX_DOMAIN=workers.opensandbox.ai
Environment=OPENSANDBOX_SANDBOX_DOMAIN=workers.opencomputer.dev
Environment=OPENSANDBOX_FIRECRACKER_BIN=/usr/local/bin/firecracker
Environment=OPENSANDBOX_KERNEL_PATH=/data/firecracker/vmlinux-arm64
Environment=OPENSANDBOX_IMAGES_DIR=/data/firecracker/images
Expand All @@ -297,6 +374,14 @@ SVC

sudo mkdir -p /etc/opensandbox /data/sandboxes /data/firecracker/images /data/checkpoints

# Cloudflare credentials for automatic DNS + TLS registration.
# Set these to enable HTTPS worker hostnames via Cloudflare proxy.
sudo tee /etc/opensandbox/cloudflare.env > /dev/null << 'CFENV'
OPENSANDBOX_CF_API_TOKEN=
OPENSANDBOX_CF_ZONE_ID=
CFENV
echo " NOTE: Set CF credentials in /etc/opensandbox/cloudflare.env to enable DNS registration"

# -------------------------------------------------------------------
# Enable services
# -------------------------------------------------------------------
Expand Down
10 changes: 10 additions & 0 deletions internal/compute/ec2.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ type EC2PoolConfig struct {
KeyName string
IAMInstanceProfile string // IAM instance profile name (for Secrets Manager + S3 access)
SecretsARN string // Secrets Manager ARN passed to worker env
CFAPIToken string // Cloudflare API token for worker DNS registration
CFZoneID string // Cloudflare zone ID for opencomputer.dev
}

// EC2Pool implements compute.Pool using AWS EC2 instances.
Expand Down Expand Up @@ -278,6 +280,14 @@ func (p *EC2Pool) buildUserData(opts MachineOpts) string {
}
sb.WriteString("ENVEOF\n\n")

// Write Cloudflare credentials for automatic DNS + TLS registration
if p.cfg.CFAPIToken != "" && p.cfg.CFZoneID != "" {
sb.WriteString("cat > /etc/opensandbox/cloudflare.env << 'CFEOF'\n")
sb.WriteString(fmt.Sprintf("OPENSANDBOX_CF_API_TOKEN=%s\n", p.cfg.CFAPIToken))
sb.WriteString(fmt.Sprintf("OPENSANDBOX_CF_ZONE_ID=%s\n", p.cfg.CFZoneID))
sb.WriteString("CFEOF\n\n")
}

// Mount NVMe with XFS project quotas
sb.WriteString("# Mount NVMe instance storage with XFS project quotas\n")
sb.WriteString("if [ -b /dev/nvme1n1 ]; then\n")
Expand Down
1 change: 1 addition & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ type Config struct {
// The secret should be a JSON object with keys matching env var names (e.g. OPENSANDBOX_JWT_SECRET).
// Env vars take precedence over secret values (for local overrides).
SecretsARN string

}

// Load reads configuration from environment variables with sensible defaults.
Expand Down
17 changes: 16 additions & 1 deletion internal/controlplane/redis_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"log"
"strings"
"sync"
"time"

Expand All @@ -23,13 +24,27 @@ type WorkerEntry struct {
MachineID string `json:"machine_id,omitempty"` // EC2 instance ID
Region string `json:"region"`
GRPCAddr string `json:"grpc_addr"`
HTTPAddr string `json:"http_addr"`
HTTPAddr string `json:"http_addr"` // Public address (may be HTTPS with hostname)
Capacity int `json:"capacity"`
Current int `json:"current"`
CPUPct float64 `json:"cpu_pct"`
MemPct float64 `json:"mem_pct"`
}

// InternalHTTPAddr returns the private HTTP address for internal routing (VPC).
// Derived from GRPCAddr (private_ip:9090 → http://private_ip:8080).
// Falls back to HTTPAddr if GRPCAddr is not available.
func (w *WorkerEntry) InternalHTTPAddr() string {
if w.GRPCAddr != "" {
host := w.GRPCAddr
if i := strings.LastIndex(host, ":"); i >= 0 {
host = host[:i]
}
return "http://" + host + ":8080"
}
return w.HTTPAddr
}

// RedisWorkerRegistry maintains an in-memory cache of worker state
// backed by Redis pub/sub for real-time updates and periodic SCAN for reconciliation.
// It also maintains a persistent gRPC connection pool to workers.
Expand Down
14 changes: 10 additions & 4 deletions internal/firecracker/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,16 +141,22 @@ func (c *FirecrackerClient) CreateSnapshot(snapshotPath, memFilePath string) err

// LoadSnapshot restores a VM from a snapshot.
// If resumeVM is true, the VM starts running immediately after load.
func (c *FirecrackerClient) LoadSnapshot(snapshotPath, memFilePath string, resumeVM bool) error {
// clockDeltaUs is the microseconds elapsed since the snapshot was taken; Firecracker
// advances the guest clock by this amount so the VM wakes with the correct wall time.
// Pass 0 to skip clock correction (e.g. for legacy snapshots without a recorded time).
func (c *FirecrackerClient) LoadSnapshot(snapshotPath, memFilePath string, resumeVM bool, clockDeltaUs int64) error {
body := map[string]interface{}{
"snapshot_path": snapshotPath,
"snapshot_path": snapshotPath,
"mem_backend": map[string]string{
"backend_path": memFilePath,
"backend_type": "File",
"backend_path": memFilePath,
"backend_type": "File",
},
"enable_diff_snapshots": false,
"resume_vm": resumeVM,
}
if clockDeltaUs > 0 {
body["clock_delta_us"] = clockDeltaUs
}
return c.put("/snapshot/load", body)
}

Expand Down
Loading