diff --git a/Makefile b/Makefile index 9aa65ba3..1ec12dd1 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,20 @@ binary_name = podman-bootc +binary_proxy= vsock-proxy output_dir = bin build_tags = exclude_graphdriver_btrfs,btrfs_noversion,exclude_graphdriver_devicemapper,containers_image_openpgp,remote +registry = quay.io/containers +vm_image_name = bootc-vm +vm_image_tag = latest +vm_image = $(registry)/$(vm_image_name):$(vm_image_tag) + all: out_dir docs go build -tags $(build_tags) $(GOOPTS) -o $(output_dir)/$(binary_name) +.PHONY: proxy +proxy: out_dir + go build -o ${output_dir}/$(binary_proxy) ./proxy + out_dir: mkdir -p $(output_dir) @@ -18,6 +28,10 @@ integration_tests: e2e_test: all ginkgo -tags $(build_tags) ./test/... +image: proxy + podman build -t $(vm_image) --device /dev/kvm \ + -f containerfiles/vm/Containerfile . + .PHONY: docs docs: make -C docs diff --git a/cmd/install.go b/cmd/install.go new file mode 100644 index 00000000..5bfa40af --- /dev/null +++ b/cmd/install.go @@ -0,0 +1,178 @@ +package cmd + +import ( + "context" + "fmt" + "os" + filepath "path/filepath" + + "github.com/containers/podman-bootc/pkg/podman" + "github.com/containers/podman-bootc/pkg/vm" + "github.com/containers/podman-bootc/pkg/vm/domain" + "github.com/containers/podman/v5/pkg/bindings" + "github.com/spf13/cobra" + log "github.com/sirupsen/logrus" +) + +type installCmd struct { + image string + bootcCmdLine []string + artifactsDir string + diskPath string + ctx context.Context + socket string + podmanSocketDir string + libvirtDir string + outputImage string + containerStorage string + configPath string + outputPath string + installVM *vm.InstallVM +} + +func filterCmdlineArgs(args []string) ([]string, error) { + sepIndex := -1 + for i, arg := range args { + if arg == "--" { + sepIndex = i + break + } + } + if sepIndex == -1 { + return nil, fmt.Errorf("no command line specified") + } + + return args[sepIndex+1:], nil +} + +func NewInstallCommand() *cobra.Command { + c := installCmd{} + cmd := &cobra.Command{ + Use: "install", + Short: "Install the OS Containers", + Long: "Run bootc install to build the OS Containers. Specify the bootc cmdline after the '--'", + RunE: c.doInstall, + } + cacheDir, err := os.UserCacheDir() + if err != nil { + cacheDir = "" + } + cacheDir = filepath.Join(cacheDir, "bootc") + cmd.PersistentFlags().StringVar(&c.image, "bootc-image", "", "bootc-vm container image") + cmd.PersistentFlags().StringVar(&c.artifactsDir, "dir", cacheDir, "directory where the artifacts are extracted") + cmd.PersistentFlags().StringVar(&c.outputPath, "output-dir", "", "directory to store the output results") + cmd.PersistentFlags().StringVar(&c.outputImage, "output-image", "", "path of the image to use for the installation") + cmd.PersistentFlags().StringVar(&c.configPath, "config-dir", "", "path where to find the config.toml") + cmd.PersistentFlags().StringVar(&c.containerStorage, "container-storage", podman.DefaultContainerStorage(), "Container storage to use") + cmd.PersistentFlags().StringVar(&c.socket, "podman-socket", podman.DefaultPodmanSocket(), "path to the podman socket") + if args, err := filterCmdlineArgs(os.Args); err == nil { + c.bootcCmdLine = args + } + + return cmd +} + +func init() { + RootCmd.AddCommand(NewInstallCommand()) +} + +func (c *installCmd) validateArgs() error { + if c.image == "" { + return fmt.Errorf("the bootc-image cannot be empty") + } + if c.artifactsDir == "" { + return fmt.Errorf("the artifacts directory path cannot be empty") + } + if c.outputImage == "" { + return fmt.Errorf("the output-image needs to be set") + } + if c.outputPath == "" { + return fmt.Errorf("the output-path needs to be set") + } + if c.configPath == "" { + return fmt.Errorf("the config-dir needs to be set") + } + if c.containerStorage == "" { + return fmt.Errorf("the container storage cannot be empty") + } + if c.socket == "" { + return fmt.Errorf("the socket for podman cannot be empty") + } + if len(c.bootcCmdLine) == 0 { + return fmt.Errorf("the bootc commandline needs to be specified after the '--'") + } + var err error + c.ctx, err = bindings.NewConnection(context.Background(), "unix://"+c.socket) + if err != nil { + return fmt.Errorf("failed to connect to podman at %s: %v", c.socket, err) + } + + return nil +} + +func (c *installCmd) installBuildVM(kernel, initrd string) error { + image := filepath.Join(c.outputPath, c.outputImage) + outputImageFormat, err := domain.GetDiskInfo(image) + if err != nil { + return err + } + c.installVM = vm.NewInstallVM(filepath.Join(c.libvirtDir, "virtqemud-sock"), vm.InstallOptions{ + OutputFormat: outputImageFormat, + OutputImage: filepath.Join(vm.OutputDir, c.outputImage), // Path relative to the container filesystem + Root: false, + Kernel: kernel, + Initrd: initrd, + }) + if err := c.installVM.Run(); err != nil { + return err + } + + return nil +} + +func (c *installCmd) doInstall(_ *cobra.Command, _ []string) error { + if err := c.validateArgs(); err != nil { + return err + } + c.libvirtDir = filepath.Join(c.artifactsDir, "libvirt") + if _, err := os.Stat(c.libvirtDir); os.IsNotExist(err) { + if err := os.Mkdir(c.libvirtDir, 0755); err != nil { + return err + } + } + c.podmanSocketDir = filepath.Join(c.artifactsDir, "podman") + if _, err := os.Stat(c.podmanSocketDir); os.IsNotExist(err) { + if err := os.Mkdir(c.podmanSocketDir, 0755); err != nil { + return err + } + } + remoteSocket := filepath.Join(c.podmanSocketDir, "podman-vm.sock") + vmCont := podman.NewVMContainer(c.image, c.socket, &podman.RunVMContainerOptions{ + ContainerStoragePath: c.containerStorage, + ConfigDir: c.configPath, + OutputDir: c.outputPath, + SocketDir: c.podmanSocketDir, + LibvirtSocketDir: c.libvirtDir, + }) + if err := vmCont.Run(); err != nil { + return err + } + defer vmCont.Stop() + + kernel, initrd, err := vmCont.GetBootArtifacts() + if err != nil { + return err + } + log.Debugf("Boot artifacts kernel: %s and initrd: %s", kernel, initrd) + + if err := c.installBuildVM(kernel, initrd); err != nil { + return err + } + defer c.installVM.Stop() + + if err := podman.RunPodmanCmd(remoteSocket, c.image, c.bootcCmdLine); err != nil { + return err + } + + return nil +} diff --git a/containerfiles/vm/Containerfile b/containerfiles/vm/Containerfile new file mode 100644 index 00000000..c12454e3 --- /dev/null +++ b/containerfiles/vm/Containerfile @@ -0,0 +1,28 @@ +FROM quay.io/fedora/fedora:42 + +RUN dnf install -y \ + libvirt-client \ + libvirt-daemon \ + libvirt-daemon-driver-qemu \ + libvirt-daemon-driver-storage-core \ + qemu-kvm \ + socat \ + virt-install \ + virtiofsd \ + && dnf clean all + +RUN mkdir -p /home/qemu && chown -R qemu:qemu /home/qemu +RUN mkdir -p /etc/libvirt /vm_files + +COPY containerfiles/vm/entrypoint.sh /entrypoint.sh +COPY ./bin/vsock-proxy /usr/local/bin/vsock-proxy +COPY containerfiles/vm/files /vm_files +COPY containerfiles/vm/qemu.conf /etc/libvirt/qemu.conf +COPY containerfiles/vm/virtqemud.conf /etc/libvirt/virtqemud.conf +COPY containerfiles/vm/virtiofsd-wrapper /usr/local/bin/virtiofsd-wrapper + +EXPOSE 5959 + +RUN dnf install -y socat + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/containerfiles/vm/entrypoint.sh b/containerfiles/vm/entrypoint.sh new file mode 100755 index 00000000..1e6899d2 --- /dev/null +++ b/containerfiles/vm/entrypoint.sh @@ -0,0 +1,34 @@ +#!/usr/bin/bash + +set -xe + +BOOTC_ROOT=/bootc-data + +# Inject the binaries, systemd and configuration files in the bootc image +mkdir -p ${BOOTC_ROOT}/etc/sysusers.d +mkdir -p ${BOOTC_ROOT}/usr/lib/containers/storage +cp /vm_files/bootc.conf ${BOOTC_ROOT}/etc/sysusers.d/bootc.conf +cp /vm_files/podman-vsock-proxy.service ${BOOTC_ROOT}/etc/systemd/system/podman-vsock-proxy.service +cp /vm_files/mount-vfsd-targets.service ${BOOTC_ROOT}/etc/systemd/system/mount-vfsd-targets.service +cp /vm_files/mount-vfsd-targets.sh ${BOOTC_ROOT}/usr/local/bin/mount-vfsd-targets.sh +cp /vm_files/container-storage.conf ${BOOTC_ROOT}/etc/containers/storage.conf +cp /vm_files/selinux-config ${BOOTC_ROOT}/etc/selinux/config +cp /vm_files/sudoers-bootc ${BOOTC_ROOT}/etc/sudoers.d/bootc +cp /usr/local/bin/vsock-proxy ${BOOTC_ROOT}/usr/local/bin/vsock-proxy + +# Enable systemd services +chroot ${BOOTC_ROOT} systemctl enable mount-vfsd-targets +chroot ${BOOTC_ROOT} systemctl enable podman.socket +chroot ${BOOTC_ROOT} systemctl enable podman-vsock-proxy.service +# Create an empty password for the bootc user +entry='bootc::20266::::::' +echo $entry >> ${BOOTC_ROOT}/etc/shadow + +# Start proxy the VM port 1234 to unix socket +vsock-proxy --log-level debug -s /run/podman/podman-vm.sock -p 1234 --cid 3 \ + --listen-mode unixToVsock &> /var/log/vsock-proxy.log & + +# Finally, start libvirt +/usr/sbin/virtlogd & +/usr/bin/virtstoraged & +/usr/sbin/virtqemud -v -t 0 diff --git a/containerfiles/vm/files/bootc.conf b/containerfiles/vm/files/bootc.conf new file mode 100644 index 00000000..f019192e --- /dev/null +++ b/containerfiles/vm/files/bootc.conf @@ -0,0 +1 @@ +u bootc - "Bootc User" /home/bootc /bin/bash diff --git a/containerfiles/vm/files/container-storage.conf b/containerfiles/vm/files/container-storage.conf new file mode 100644 index 00000000..8204e087 --- /dev/null +++ b/containerfiles/vm/files/container-storage.conf @@ -0,0 +1,14 @@ +[storage] + +driver = "overlay" +runroot = "/run/containers/storage" +graphroot = "/var/lib/containers/storage" + +[storage.options] +additionalimagestores = [ +"/usr/lib/containers/storage", +"/usr/lib/bootc/container_storage", +] +pull_options = {enable_partial_images = "true", use_hard_links = "false", ostree_repos=""} +[storage.options.overlay] +mountopt = "nodev,metacopy=on" diff --git a/containerfiles/vm/files/mount-vfsd-targets.service b/containerfiles/vm/files/mount-vfsd-targets.service new file mode 100644 index 00000000..a1b37426 --- /dev/null +++ b/containerfiles/vm/files/mount-vfsd-targets.service @@ -0,0 +1,12 @@ +[Unit] +Description=Mount all virtiofs targets +After=local-fs.target +ConditionPathExists=/sys/fs/virtiofs + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/mount-vfsd-targets.sh +RemainAfterExit=true + +[Install] +WantedBy=multi-user.target diff --git a/containerfiles/vm/files/mount-vfsd-targets.sh b/containerfiles/vm/files/mount-vfsd-targets.sh new file mode 100755 index 00000000..8b650d87 --- /dev/null +++ b/containerfiles/vm/files/mount-vfsd-targets.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -xe +mkdir -p /usr/lib/bootc/config +mkdir -p /usr/lib/bootc/container_storage +mkdir -p /usr/lib/bootc/output +mount -t virtiofs config /usr/lib/bootc/config +mount -t virtiofs storage /usr/lib/bootc/container_storage +mount -t virtiofs output /usr/lib/bootc/output diff --git a/containerfiles/vm/files/podman-vsock-proxy.service b/containerfiles/vm/files/podman-vsock-proxy.service new file mode 100644 index 00000000..17f47fb1 --- /dev/null +++ b/containerfiles/vm/files/podman-vsock-proxy.service @@ -0,0 +1,15 @@ +[Unit] +Description=Proxy vsock (PORT: 1234) to Unix podman socket +After=network.target +Requires=network.target + +[Service] +Type=simple +ExecStart=/usr/local/bin/vsock-proxy --log-level debug --cid 3 --port 1234 \ + --socket /var/run/podman/podman.sock --listen-mode vsockToUnix +Restart=always +RestartSec=3 + +[Install] +WantedBy=multi-user.target + diff --git a/containerfiles/vm/files/selinux-config b/containerfiles/vm/files/selinux-config new file mode 100644 index 00000000..cacc201e --- /dev/null +++ b/containerfiles/vm/files/selinux-config @@ -0,0 +1 @@ +SELINUX=disabled diff --git a/containerfiles/vm/files/sudoers-bootc b/containerfiles/vm/files/sudoers-bootc new file mode 100644 index 00000000..64892707 --- /dev/null +++ b/containerfiles/vm/files/sudoers-bootc @@ -0,0 +1 @@ +bootc ALL=(ALL) NOPASSWD: ALL diff --git a/containerfiles/vm/qemu.conf b/containerfiles/vm/qemu.conf new file mode 100644 index 00000000..4e4c59e3 --- /dev/null +++ b/containerfiles/vm/qemu.conf @@ -0,0 +1,10 @@ +stdio_handler = "logd" +vnc_listen = "0.0.0.0" +vnc_tls = 0 +vnc_sasl = 0 +user = "qemu" +group = "qemu" +dynamic_ownership = 1 +remember_owner = 0 +namespaces = [ ] +cgroup_controllers = [ ] diff --git a/containerfiles/vm/virtiofsd-wrapper b/containerfiles/vm/virtiofsd-wrapper new file mode 100755 index 00000000..a7f633ab --- /dev/null +++ b/containerfiles/vm/virtiofsd-wrapper @@ -0,0 +1,6 @@ +#!/bin/bash +exec /usr/libexec/virtiofsd \ + --sandbox=none \ + --cache=auto --modcaps=-mknod \ + --log-level debug \ + "$@" diff --git a/containerfiles/vm/virtqemud.conf b/containerfiles/vm/virtqemud.conf new file mode 100644 index 00000000..39432813 --- /dev/null +++ b/containerfiles/vm/virtqemud.conf @@ -0,0 +1,3 @@ +listen_tls = 0 +listen_tcp = 0 +log_outputs = "1:stderr" diff --git a/go.mod b/go.mod index ca386fc5..89f7e3d8 100644 --- a/go.mod +++ b/go.mod @@ -8,10 +8,13 @@ require ( github.com/containers/gvisor-tap-vsock v0.7.3 github.com/containers/podman/v5 v5.0.1 github.com/distribution/reference v0.5.0 + github.com/docker/docker v25.0.3+incompatible github.com/docker/go-units v0.5.0 github.com/gofrs/flock v0.8.1 + github.com/mdlayher/vsock v1.2.1 github.com/onsi/ginkgo/v2 v2.17.1 github.com/onsi/gomega v1.32.0 + github.com/opencontainers/runtime-spec v1.2.0 github.com/sirupsen/logrus v1.9.3 github.com/spf13/cobra v1.8.0 golang.org/x/crypto v0.28.0 @@ -20,6 +23,8 @@ require ( libvirt.org/go/libvirt v1.10002.0 ) +require github.com/mdlayher/socket v0.4.1 // indirect + require ( dario.cat/mergo v1.0.0 // indirect github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect @@ -59,7 +64,6 @@ require ( github.com/digitalocean/go-qemu v0.0.0-20230711162256-2e3d0186973e // indirect github.com/disiqueira/gotree/v3 v3.0.2 // indirect github.com/docker/distribution v2.8.3+incompatible // indirect - github.com/docker/docker v25.0.3+incompatible // indirect github.com/docker/docker-credential-helpers v0.8.1 // indirect github.com/docker/go-connections v0.5.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -135,7 +139,6 @@ require ( github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.0 // indirect github.com/opencontainers/runc v1.1.12 // indirect - github.com/opencontainers/runtime-spec v1.2.0 // indirect github.com/opencontainers/runtime-tools v0.9.1-0.20230914150019-408c51e934dc // indirect github.com/opencontainers/selinux v1.11.0 // indirect github.com/openshift/imagebuilder v1.2.6 // indirect @@ -188,6 +191,7 @@ require ( gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + libvirt.org/go/libvirtxml v1.9008.0 sigs.k8s.io/yaml v1.4.0 // indirect tags.cncf.io/container-device-interface v0.6.2 // indirect ) diff --git a/go.sum b/go.sum index 1212979d..0a72931f 100644 --- a/go.sum +++ b/go.sum @@ -361,6 +361,10 @@ github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= +github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U= +github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA= +github.com/mdlayher/vsock v1.2.1 h1:pC1mTJTvjo1r9n9fbm7S1j04rCgCzhCOS5DY0zqHlnQ= +github.com/mdlayher/vsock v1.2.1/go.mod h1:NRfCibel++DgeMD8z/hP+PPTjlNJsdPOmxcnENvE+SE= github.com/miekg/pkcs11 v1.1.1 h1:Ugu9pdy6vAYku5DEpVWVFPYnzV+bxB+iRdbuFSu7TvU= github.com/miekg/pkcs11 v1.1.1/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs= github.com/mistifyio/go-zfs/v3 v3.0.1 h1:YaoXgBePoMA12+S1u/ddkv+QqxcfiZK4prI6HPnkFiU= @@ -760,6 +764,8 @@ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= libvirt.org/go/libvirt v1.10002.0 h1:ZFQsv1G8HE8SYhLBqaOuxze6+f00x96khLwn54aWJnI= libvirt.org/go/libvirt v1.10002.0/go.mod h1:1WiFE8EjZfq+FCVog+rvr1yatKbKZ9FaFMZgEqxEJqQ= +libvirt.org/go/libvirtxml v1.9008.0 h1:xo2U9SqUsufTFtbyjiqs6oDdF329cvtRdqttWN7eojk= +libvirt.org/go/libvirtxml v1.9008.0/go.mod h1:7Oq2BLDstLr/XtoQD8Fr3mfDNrzlI3utYKySXF2xkng= nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/pkg/podman/podman.go b/pkg/podman/podman.go new file mode 100644 index 00000000..74252162 --- /dev/null +++ b/pkg/podman/podman.go @@ -0,0 +1,436 @@ +package podman + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + "os/user" + "path/filepath" + "strings" + "time" + + _ "embed" + + "github.com/containers/podman-bootc/pkg/utils" + "github.com/containers/podman-bootc/pkg/vm" + ocispec "github.com/opencontainers/runtime-spec/specs-go" + log "github.com/sirupsen/logrus" + + "github.com/containers/podman/v5/libpod/define" + "github.com/containers/podman/v5/pkg/api/handlers" + "github.com/containers/podman/v5/pkg/bindings" + "github.com/containers/podman/v5/pkg/bindings/containers" + "github.com/containers/podman/v5/pkg/bindings/images" + "github.com/containers/podman/v5/pkg/specgen" + "github.com/docker/docker/api/types" +) + +type RunVMContainerOptions struct { + ContainerStoragePath string + ConfigDir string + OutputDir string + SocketDir string + LibvirtSocketDir string +} + +func detectLocalPodman() string { + return "" +} + +type VMContainer struct { + contID string + image string + socketPath string + opts *RunVMContainerOptions +} + +func ExecInContainer(ctx context.Context, containerID string, cmd []string) (string, error) { + execCreateOptions := &handlers.ExecCreateConfig{ + ExecConfig: types.ExecConfig{ + Tty: true, + AttachStdin: true, + AttachStderr: true, + AttachStdout: true, + Cmd: cmd, + }, + } + execID, err := containers.ExecCreate(ctx, containerID, execCreateOptions) + if err != nil { + return "", fmt.Errorf("exec create failed: %w", err) + } + // Prepare streams + var stdoutBuf, stderrBuf bytes.Buffer + var stdout io.Writer = &stdoutBuf + var stderr io.Writer = &stderrBuf + // Start exec and attach + err = containers.ExecStartAndAttach(ctx, execID, &containers.ExecStartAndAttachOptions{ + OutputStream: &stdout, + ErrorStream: &stderr, + AttachOutput: utils.Ptr(true), + AttachError: utils.Ptr(true), + }) + if err != nil { + return "", fmt.Errorf("exec start failed: %w", err) + } + + // Handle output and errors + if stderrBuf.Len() > 0 { + return "", fmt.Errorf("stderr: %s", stderrBuf.String()) + } + + return stdoutBuf.String(), nil +} + +func (c *VMContainer) GetBootArtifacts() (string, string, error) { + ctx, err := connectPodman(c.socketPath) + if err != nil { + return "", "", fmt.Errorf("Failed to connect to Podman service: %v", err) + } + isRunning, err := isContainerRunning(ctx, c.contID) + if err != nil { + return "", "", err + } + if !isRunning { + return "", "", fmt.Errorf("the VM container isn't running") + } + findKernel := []string{"find", "/bootc-data/usr/lib/modules/", "-name", "vmlinuz", "-type", "f"} + findInitrd := []string{"find", "/bootc-data/usr/lib/modules/", "-name", "initramfs.img", "-type", "f"} + out, err := ExecInContainer(ctx, c.contID, findKernel) + if err != nil { + return "", "", err + } + kernel := strings.Trim(out, "\r\n") + out, err = ExecInContainer(ctx, c.contID, findInitrd) + if err != nil { + return "", "", err + } + initrd := strings.Trim(out, "\r\n") + + return kernel, initrd, nil +} + +func NewVMContainer(image, socketPath string, opts *RunVMContainerOptions) *VMContainer { + return &VMContainer{ + image: image, + socketPath: socketPath, + opts: opts, + } +} + +func (c *VMContainer) Stop() error { + ctx, err := connectPodman(c.socketPath) + if err != nil { + return fmt.Errorf("Failed to connect to Podman service: %v", err) + } + if err := containers.Stop(ctx, c.contID, &containers.StopOptions{}); err != nil { + return fmt.Errorf("failed to stop the bootc container: %v", err) + } + if _, err := containers.Remove(ctx, c.contID, &containers.RemoveOptions{}); err != nil { + return fmt.Errorf("failed to stop the bootc container: %v", err) + } + + return nil +} + +func (c *VMContainer) Run() error { + ctx, err := connectPodman(c.socketPath) + if err != nil { + return fmt.Errorf("Failed to connect to Podman service: %v", err) + } + + c.contID, err = createVMContainer(ctx, c.image, c.opts) + if err != nil { + return err + } + + if err := containers.Start(ctx, c.contID, &containers.StartOptions{}); err != nil { + return fmt.Errorf("failed to start the bootc container: %v", err) + } + + isRunning, err := isContainerRunning(ctx, c.contID) + if err != nil { + return err + } + if !isRunning { + return fmt.Errorf("the VM container %s isn't running", c.contID) + } + return err +} + +func isContainerRunning(ctx context.Context, name string) (bool, error) { + inspectData, err := containers.Inspect(ctx, name, nil) + if err != nil { + return false, fmt.Errorf("failed to inspect container: %w", err) + } + + // Check if it's running + return inspectData.State.Running, nil +} + +func pullImage(ctx context.Context, image string) error { + if _, err := images.Pull(ctx, image, &images.PullOptions{}); err != nil { + return fmt.Errorf("failed to pull image %s: %w", image, err) + } + + return nil +} + +func createVMContainer(ctx context.Context, image string, opts *RunVMContainerOptions) (string, error) { + if err := pullImage(ctx, image); err != nil { + return "", err + } + specGen := &specgen.SpecGenerator{ + ContainerBasicConfig: specgen.ContainerBasicConfig{ + Command: []string{"/entrypoint.sh"}, + Stdin: utils.Ptr(true), + }, + ContainerStorageConfig: specgen.ContainerStorageConfig{ + Image: vm.VMImage, + ImageVolumes: []*specgen.ImageVolume{ + { + Destination: vm.BootcDir, + Source: image, + ReadWrite: true, + }, + }, + Devices: []ocispec.LinuxDevice{ + { + Path: "/dev/kvm", + Type: "char", + }, + { + Path: "/dev/vhost-net", + Type: "char", + }, + { + Path: "/dev/vhost-vsock", + Type: "char", + }, + { + Path: "/dev/vhost-vsock", + Type: "char", + }, + }, + Mounts: []ocispec.Mount{ + { + Destination: vm.ContainerStoragePath, + Source: opts.ContainerStoragePath, + Type: "bind", + }, + { + Destination: vm.OutputDir, + Source: opts.OutputDir, + Type: "bind", + }, + { + Destination: vm.ConfigDir, + Source: opts.ConfigDir, + Type: "bind", + }, + { + Destination: vm.SocketDir, + Source: opts.SocketDir, + Type: "bind", + }, + { + Destination: vm.LibvirtSocketDir, + Source: opts.LibvirtSocketDir, + Type: "bind", + }, + }, + }, + ContainerSecurityConfig: specgen.ContainerSecurityConfig{ + Privileged: utils.Ptr(true), + SelinuxOpts: []string{"type:unconfined_t"}, + }, + ContainerCgroupConfig: specgen.ContainerCgroupConfig{}, + ContainerNetworkConfig: specgen.ContainerNetworkConfig{ + PublishExposedPorts: utils.Ptr(true), + Expose: map[uint16]string{uint16(vm.VNCPort): "tcp"}, + }, + } + if err := specGen.Validate(); err != nil { + return "", err + } + response, err := containers.CreateWithSpec(ctx, specGen, &containers.CreateOptions{}) + if err != nil { + return "", err + } + + log.Debugf("Run VM container ID: %s", response.ID) + + return response.ID, nil +} + +func connectPodman(socketPath string) (context.Context, error) { + const ( + retryInterval = 5 * time.Second + timeout = 5 * time.Minute + ) + + deadline := time.Now().Add(timeout) + + var ctx context.Context + var err error + + for time.Now().Before(deadline) { + ctx, err = bindings.NewConnection(context.Background(), fmt.Sprintf("unix:%s", socketPath)) + if err == nil { + log.Debugf("Connected to Podman successfully!") + return ctx, nil + } + + log.Debugf("Failed to connect to Podman. Retrying in %s seconds...", retryInterval.String()) + time.Sleep(retryInterval) + } + + return nil, fmt.Errorf("Unable to connect to Podman after %v: %v", timeout, err) +} + +func createBootcContainer(ctx context.Context, image string, bootcCmdLine []string) (string, error) { + log.Debugf("Create bootc container with cmdline: %v", bootcCmdLine) + specGen := &specgen.SpecGenerator{ + ContainerBasicConfig: specgen.ContainerBasicConfig{ + Command: bootcCmdLine, + Stdin: utils.Ptr(true), + PidNS: specgen.Namespace{ + NSMode: specgen.Host, + }, + }, + ContainerStorageConfig: specgen.ContainerStorageConfig{ + Image: image, + Mounts: []ocispec.Mount{ + { + Destination: "/var/lib/containers", + Source: "/var/lib/containers", + Type: "bind", + }, + { + Destination: "/var/lib/containers/storage", + Source: vm.ContainerStoragePath, + Type: "bind", + }, + { + Destination: "/dev", + Source: "/dev", + Type: "bind", + }, + { + Destination: "/output", + Source: vm.OutputDir, + Type: "bind", + }, + { + Destination: "/config", + Source: vm.ConfigDir, + Type: "bind", + }, + }, + }, + ContainerSecurityConfig: specgen.ContainerSecurityConfig{ + Privileged: utils.Ptr(true), + SelinuxOpts: []string{"type:unconfined_t"}, + }, + ContainerCgroupConfig: specgen.ContainerCgroupConfig{}, + } + if err := specGen.Validate(); err != nil { + return "", err + } + response, err := containers.CreateWithSpec(ctx, specGen, &containers.CreateOptions{}) + if err != nil { + return "", err + } + + return response.ID, nil +} + +func fetchLogsAfterExit(ctx context.Context, containerID string) error { + stdoutCh := make(chan string) + stderrCh := make(chan string) + + // Start log streaming + go func() { + logOpts := new(containers.LogOptions).WithFollow(true).WithStdout(true).WithStderr(true) + + err := containers.Logs(ctx, containerID, logOpts, stdoutCh, stderrCh) + if err != nil { + log.Errorf("Error streaming logs: %v\n", err) + } + close(stdoutCh) + close(stderrCh) + }() + + go func() { + for line := range stdoutCh { + fmt.Fprintf(os.Stdout, "%s", line) + } + }() + go func() { + for line := range stderrCh { + fmt.Fprintf(os.Stderr, "%s", line) + } + }() + + exitCode, err := containers.Wait(ctx, containerID, new(containers.WaitOptions). + WithCondition([]define.ContainerStatus{define.ContainerStateExited})) + if err != nil { + return fmt.Errorf("failed to wait for container: %w", err) + } + if exitCode != 0 { + return fmt.Errorf("bootc command failed: %d", exitCode) + } + + return nil +} + +func RunPodmanCmd(socketPath string, image string, bootcCmdLine []string) error { + ctx, err := connectPodman(socketPath) + if err != nil { + return fmt.Errorf("Failed to connect to Podman service: %v", err) + } + + name, err := createBootcContainer(ctx, image, bootcCmdLine) + if err != nil { + return fmt.Errorf("failed to create the bootc container: %v", err) + } + + if err := containers.Start(ctx, name, &containers.StartOptions{}); err != nil { + return fmt.Errorf("failed to start the bootc container: %v", err) + } + + if err := fetchLogsAfterExit(ctx, name); err != nil { + return fmt.Errorf("failed executing bootc: %v", err) + } + + return nil +} + +func DefaultPodmanSocket() string { + if envSock := os.Getenv("DOCKER_HOST"); envSock != "" { + return envSock + } + runtimeDir := os.Getenv("XDG_RUNTIME_DIR") + if runtimeDir != "" { + return filepath.Join(runtimeDir, "podman", "podman.sock") + } + usr, err := user.Current() + if err == nil && usr.Uid != "0" { + return "/run/user/" + usr.Uid + "/podman/podman.sock" + } + + return "/run/podman/podman.sock" +} + +func DefaultContainerStorage() string { + usr, err := user.Current() + if err == nil && usr.Uid != "0" { + homeDir := os.Getenv("HOME") + if homeDir != "" { + return filepath.Join(homeDir, ".local/share/containers/storage") + } + } + + return "/var/lib/containers/storage" +} diff --git a/pkg/utils/pointer.go b/pkg/utils/pointer.go new file mode 100644 index 00000000..947538cd --- /dev/null +++ b/pkg/utils/pointer.go @@ -0,0 +1,5 @@ +package utils + +func Ptr[T any](v T) *T { + return &v +} diff --git a/pkg/vm/domain/domain.go b/pkg/vm/domain/domain.go new file mode 100644 index 00000000..647df85b --- /dev/null +++ b/pkg/vm/domain/domain.go @@ -0,0 +1,279 @@ +package domain + +import ( + "encoding/json" + "fmt" + "io" + "os/exec" + + "github.com/sirupsen/logrus" + "libvirt.org/go/libvirtxml" +) + +type DomainOption func(d *libvirtxml.Domain) + +const ( + MemoryMemfd = "memfd" + MemoryAccessModeShared = "shared" +) + +type DiskDriverType string + +func (d DiskDriverType) String() string { + return string(d) +} + +const ( + DiskDriverQCOW2 DiskDriverType = "qcow2" + DiskDriverRaw DiskDriverType = "raw" +) + +type DiskBus string + +func (b DiskBus) String() string { + return string(b) +} + +const ( + DiskBusSCSI DiskBus = "scsi" + DiskBusVirtio DiskBus = "virtio" +) + +func NewDomain(opts ...DomainOption) *libvirtxml.Domain { + domain := &libvirtxml.Domain{} + for _, f := range opts { + f(domain) + } + + return domain +} + +func WithName(name string) DomainOption { + return func(d *libvirtxml.Domain) { + d.Name = name + } +} + +func WithMemory(memory uint) DomainOption { + return func(d *libvirtxml.Domain) { + d.Memory = &libvirtxml.DomainMemory{ + Value: memory, + Unit: "MiB", + } + } +} + +func WithMemoryBackingForVirtiofs() DomainOption { + return func(d *libvirtxml.Domain) { + d.MemoryBacking = &libvirtxml.DomainMemoryBacking{ + MemorySource: &libvirtxml.DomainMemorySource{Type: MemoryMemfd}, + MemoryAccess: &libvirtxml.DomainMemoryAccess{Mode: MemoryAccessModeShared}, + } + } +} + +func WithCPUHostModel() DomainOption { + return func(d *libvirtxml.Domain) { + d.CPU = &libvirtxml.DomainCPU{ + Mode: "host-model", + } + } +} + +func WithVCPUs(cpus uint) DomainOption { + return func(d *libvirtxml.Domain) { + d.VCPU = &libvirtxml.DomainVCPU{Value: cpus} + } +} + +func allocateDevices(d *libvirtxml.Domain) { + if d.Devices == nil { + d.Devices = &libvirtxml.DomainDeviceList{} + } +} + +func WithFilesystem(source, target, vfsdBin string) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Filesystems = append(d.Devices.Filesystems, libvirtxml.DomainFilesystem{ + Driver: &libvirtxml.DomainFilesystemDriver{ + Type: "virtiofs", + }, + Source: &libvirtxml.DomainFilesystemSource{ + Mount: &libvirtxml.DomainFilesystemSourceMount{ + Dir: source, + }, + }, + Target: &libvirtxml.DomainFilesystemTarget{ + Dir: target, + }, + Binary: &libvirtxml.DomainFilesystemBinary{ + Path: vfsdBin, + }, + }) + } +} + +func WithDisk(path, serial, dev string, diskType DiskDriverType, bus DiskBus) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Disks = append(d.Devices.Disks, libvirtxml.DomainDisk{ + Device: "disk", + Driver: &libvirtxml.DomainDiskDriver{ + Name: "qemu", + Type: diskType.String(), + }, + Source: &libvirtxml.DomainDiskSource{ + File: &libvirtxml.DomainDiskSourceFile{ + File: path, + }, + }, + Target: &libvirtxml.DomainDiskTarget{ + Bus: bus.String(), + Dev: dev, + }, + Serial: serial, + }) + } +} + +func WithSerialConsole() DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Consoles = append(d.Devices.Consoles, libvirtxml.DomainConsole{ + Source: &libvirtxml.DomainChardevSource{Pty: &libvirtxml.DomainChardevSourcePty{}}, + Target: &libvirtxml.DomainConsoleTarget{ + Type: "serial", + }, + }) + + } +} + +func WithInterface(mac, model string) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Interfaces = append(d.Devices.Interfaces, libvirtxml.DomainInterface{ + Source: &libvirtxml.DomainInterfaceSource{ + User: &libvirtxml.DomainInterfaceSourceUser{}, + }, + MAC: &libvirtxml.DomainInterfaceMAC{ + Address: mac, + }, + Model: &libvirtxml.DomainInterfaceModel{ + Type: model, + }, + }) + } +} + +func WithVSOCK(cid uint) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.VSock = &libvirtxml.DomainVSock{ + Model: "virtio", + CID: &libvirtxml.DomainVSockCID{ + Address: fmt.Sprintf("%d", cid), + }, + } + } +} + +func WithUUID(uuid string) DomainOption { + return func(d *libvirtxml.Domain) { + d.UUID = uuid + } +} + +func WithKVM() DomainOption { + return func(d *libvirtxml.Domain) { + d.Type = "kvm" + } +} + +func WithOS() DomainOption { + // TODO: fix this for multiarch + return func(d *libvirtxml.Domain) { + d.OS = &libvirtxml.DomainOS{ + Type: &libvirtxml.DomainOSType{ + Arch: "x86_64", + Machine: "q35", + Type: "hvm", + }, + } + } +} + +func WithDirectBoot(kernel, initrd, cmdline string) DomainOption { + return func(d *libvirtxml.Domain) { + d.OS.Kernel = kernel + d.OS.Initrd = initrd + d.OS.Cmdline = cmdline + } +} + +func WithVNC(port int) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Graphics = append(d.Devices.Graphics, libvirtxml.DomainGraphic{ + VNC: &libvirtxml.DomainGraphicVNC{ + Port: port, + Listen: "0.0.0.0", + }, + }) + d.Devices.Videos = append(d.Devices.Videos, libvirtxml.DomainVideo{ + Model: libvirtxml.DomainVideoModel{ + Type: "vga", + }, + }) + } +} + +func WIthFeatures() DomainOption { + return func(d *libvirtxml.Domain) { + d.Features = &libvirtxml.DomainFeatureList{ + ACPI: &libvirtxml.DomainFeature{}, + APIC: &libvirtxml.DomainFeatureAPIC{}, + } + } +} + +type diskInfo struct { + Format string `json:"format"` + BackingFile string `json:"backing-filename"` + ActualSize int64 `json:"actual-size"` + VirtualSize int64 `json:"virtual-size"` +} + +func GetDiskInfo(imagePath string) (DiskDriverType, error) { + path, err := exec.LookPath("qemu-img") + if err != nil { + return "", fmt.Errorf("qemu-img not found: %v\n", err) + } + + args := []string{"info", imagePath, "--output", "json"} + cmd := exec.Command(path, args...) + logrus.Debugf("Execute: %s", cmd.String()) + stderr, err := cmd.StderrPipe() + if err != nil { + return "", fmt.Errorf("failed to get stderr for qemu-img command: %v", err) + } + out, err := cmd.Output() + if err != nil { + errout, _ := io.ReadAll(stderr) + return "", fmt.Errorf("failed to invoke qemu-img on %s: %v: %s", imagePath, err, errout) + } + info := &diskInfo{} + err = json.Unmarshal(out, info) + if err != nil { + return "", fmt.Errorf("failed to parse disk info: %v", err) + } + switch info.Format { + case "qcow2": + return DiskDriverQCOW2, nil + case "raw": + return DiskDriverRaw, nil + default: + return "", fmt.Errorf("Unsupported format: %s", info.Format) + } +} diff --git a/pkg/vm/installvm.go b/pkg/vm/installvm.go new file mode 100644 index 00000000..e647311c --- /dev/null +++ b/pkg/vm/installvm.go @@ -0,0 +1,187 @@ +package vm + +import ( + "fmt" + "os" + "time" + + "math/rand" + + "github.com/containers/podman-bootc/pkg/vm/domain" + "github.com/google/uuid" + "github.com/opencontainers/runtime-tools/filepath" + "github.com/sirupsen/logrus" + "libvirt.org/go/libvirt" + "libvirt.org/go/libvirtxml" +) + +const ( + CIDInstallVM = 3 + VSOCKPort = 1234 +) + +const VMImage = "quay.io/containers/bootc-vm:latest" + +const ( + mac = "52:54:00:0b:dd:1e" + imodel = "e1000" +) + +const ( + ContainerStoragePath = "/usr/lib/bootc/container_storage" + ConfigDir = "/usr/lib/bootc/config" + OutputDir = "/usr/lib/bootc/output" + SocketDir = "/run/podman" + LibvirtSocketDir = "/run/libvirt" + BootcDir = "/bootc-data" +) +const ( + RootTarget = "root" + StorageVirtiofsTarget = "storage" + ConfigVirtiofsTarget = "config" + OutputVirtiofsTarget = "output" +) + +const cmdline = "console=ttyS0 rootfstype=virtiofs root=root rw init=/sbin/init panic=1" + +const VNCPort int = 5959 +// The virtiofs wrapper helps to launch virtiofs with the correct flags inside a container +const VsfdWrapperPath = "/usr/local/bin/virtiofsd-wrapper" + +type InstallOptions struct { + OutputImage string + OutputFormat domain.DiskDriverType + Root bool + Kernel string + Initrd string +} + +type InstallVM struct { + libvirtURI string + socket string + domain string + opts InstallOptions +} + +const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOQRSTUVWXYZ0123456789" + +func RandomString(n int) string { + b := make([]byte, n) + for i := range b { + b[i] = letterBytes[rand.Intn(len(letterBytes))] + } + return string(b) +} + +func NewInstallVM(path string, opts InstallOptions) *InstallVM { + mode := "session" + if opts.Root { + mode = "system" + } + uri := fmt.Sprintf("qemu:///%s?socket=%s", mode, path) + name := "bootc-" + RandomString(5) + return &InstallVM{ + domain: name, + libvirtURI: uri, + opts: opts, + socket: path, + } +} + +func (vm *InstallVM) newDomain() *libvirtxml.Domain { + return domain.NewDomain( + domain.WithName(vm.domain), + domain.WithUUID(uuid.New().String()), + domain.WithKVM(), + domain.WithOS(), + domain.WithMemory(2048), + domain.WithMemoryBackingForVirtiofs(), + domain.WithCPUHostModel(), + domain.WithVCPUs(2), + domain.WithSerialConsole(), + domain.WithVSOCK(CIDInstallVM), + domain.WithInterface(mac, imodel), + domain.WithDisk(filepath.Join(OutputDir, vm.opts.OutputImage), "output", "vda", vm.opts.OutputFormat, domain.DiskBusVirtio), + domain.WithFilesystem(BootcDir, RootTarget, VsfdWrapperPath), + domain.WithFilesystem(ContainerStoragePath, StorageVirtiofsTarget, VsfdWrapperPath), + domain.WithFilesystem(ConfigDir, ConfigVirtiofsTarget, VsfdWrapperPath), + domain.WithFilesystem(OutputDir, OutputVirtiofsTarget, VsfdWrapperPath), + domain.WithDirectBoot(vm.opts.Kernel, vm.opts.Initrd, cmdline), + domain.WithVNC(VNCPort), + domain.WIthFeatures(), + ) +} + +func waitForSocket(path string, timeout time.Duration, interval time.Duration) error { + logrus.Debugf("Wait for socket %s", path) + start := time.Now() + + for { + _, err := os.Stat(path) + if err == nil { + return nil + } + if !os.IsNotExist(err) { + return fmt.Errorf("error checking file: %w", err) + } + + if time.Since(start) > timeout { + break + } + time.Sleep(interval) + } + + return fmt.Errorf("timeout waiting for file: " + path) +} + +func (vm *InstallVM) Run() error { + if err := waitForSocket(vm.socket, 2*time.Minute, 1*time.Second); err != nil { + return err + } + domainXML, err := vm.newDomain().Marshal() + if err != nil { + return err + } + logrus.Debugf("XML: %s", domainXML) + conn, err := libvirt.NewConnect(vm.libvirtURI) + if err != nil { + return err + } + _, err = conn.DomainDefineXMLFlags(domainXML, libvirt.DOMAIN_DEFINE_VALIDATE) + if err != nil { + return fmt.Errorf("unable to define virtual machine domain: %w", err) + } + dom, err := conn.LookupDomainByName(vm.domain) + if err != nil { + return err + } + defer dom.Free() + err = dom.Create() + if err != nil { + return fmt.Errorf("Failed to start domain: %v", err) + } + logrus.Debugf("Domain %s started successfully.", vm.domain) + + return nil +} + +func (vm *InstallVM) Stop() error { + conn, err := libvirt.NewConnect(vm.libvirtURI) + if err != nil { + return err + } + dom, err := conn.LookupDomainByName(vm.domain) + if err != nil { + return err + } + defer dom.Free() + if err := dom.Destroy(); err != nil { + logrus.Warningf("Failed to destroy the domain %s, maybe already stopped: %v", vm.domain, err) + } + if err := dom.Undefine(); err != nil { + return fmt.Errorf("Undefine failed: %v", err) + } + logrus.Debugf("Domain %s stopped and deleted successfully", vm.domain) + + return nil +} diff --git a/pkg/vsock/proxy.go b/pkg/vsock/proxy.go new file mode 100644 index 00000000..bb43a6c1 --- /dev/null +++ b/pkg/vsock/proxy.go @@ -0,0 +1,185 @@ +package vsock + +import ( + "context" + "fmt" + "io" + "net" + "os" + + "github.com/mdlayher/vsock" + log "github.com/sirupsen/logrus" +) + +type Proxy struct { + cid uint32 + port uint32 + socket string + done chan struct{} + start func(socket string, port, cid uint32, done chan struct{}) error +} + +func NewProxyUnixSocketToVsock(port, cid uint32, socket string) *Proxy { + p := &Proxy{ + cid: cid, + port: port, + socket: socket, + done: make(chan struct{}), + start: startUnixToVsock, + } + return p +} + +func NewProxyVSockToUnixSocket(port uint32, socket string) *Proxy { + p := &Proxy{ + port: port, + socket: socket, + done: make(chan struct{}), + start: startVsockToUnix, + } + return p +} + +func (proxy *Proxy) GetSocket() string { + return proxy.socket +} + +func (proxy *Proxy) Stop() { + select { + case <-proxy.done: + // already closed + default: + close(proxy.done) + } + os.Remove(proxy.socket) + log.Debugf("Stopped proxy") +} + +func (p *Proxy) Start() error { + return p.start(p.socket, p.port, p.cid, p.done) +} + +func startUnixToVsock(socket string, port, cid uint32, done chan struct{}) error { + _ = os.Remove(socket) + + unixListener, err := net.Listen("unix", socket) + if err != nil { + return fmt.Errorf("Failed to listen on unix socket: %v", err) + } + go func() { + defer unixListener.Close() + + for { + select { + case <-done: + return + default: + unixConn, err := unixListener.Accept() + if err != nil { + log.Warnf("Accept error: %v", err) + continue + } + log.Debugf("Accepted connection from %s to port %d and cid", socket, port, cid) + + go handleConnectionToVsock(unixConn, port, cid, done) + } + } + }() + + log.Debugf("Started proxy at: %s", socket) + + return nil +} + +func handleConnectionToVsock(unixConn net.Conn, port, cid uint32, done chan struct{}) { + defer unixConn.Close() + vsockConn, err := vsock.Dial(cid, port, nil) + if err != nil { + log.Printf("vsock connect error (cid: %d, port: %d): %v", cid, port, err) + return + } + defer vsockConn.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + errCh := make(chan error, 2) + go proxy(ctx, vsockConn, unixConn, errCh, done) + go proxy(ctx, unixConn, vsockConn, errCh, done) + + // Wait for the first error or cancellation + select { + case <-done: + case err := <-errCh: + if err != nil && err != io.EOF { + log.Errorf("proxy error: %v", err) + } + } +} + +func proxy(ctx context.Context, src, dst net.Conn, errCh chan error, done chan struct{}) { + go func() { + _, err := io.Copy(dst, src) + errCh <- err + }() + select { + case <-ctx.Done(): + case <-done: + case <-errCh: + } +} + +func startVsockToUnix(socket string, port, cid uint32, done chan struct{}) error { + vsockListener, err := vsock.Listen(port, &vsock.Config{}) + if err != nil { + return fmt.Errorf("failed to listen on vsock port %d: %v", port, err) + } + go func() { + defer vsockListener.Close() + + for { + select { + case <-done: + return + default: + vsockConn, err := vsockListener.Accept() + if err != nil { + log.Warnf("Accept error: %v", err) + continue + } + log.Debugf("Accepted connection from port %d to socket %d", port, socket) + + go handleConnectionToUnix(vsockConn, socket, port, done) + } + } + }() + + log.Debugf("Started proxy at port: %d", port) + + return nil +} + +func handleConnectionToUnix(vsockConn net.Conn, socket string, port uint32, done chan struct{}) { + defer vsockConn.Close() + + conn, err := net.Dial("unix", socket) + if err != nil { + log.Errorf("failed to connect: %v", err) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + errCh := make(chan error, 2) + go proxy(ctx, conn, vsockConn, errCh, done) + go proxy(ctx, vsockConn, conn, errCh, done) + + // Wait for the first error or cancellation + select { + case <-done: + case err := <-errCh: + if err != nil && err != io.EOF { + log.Errorf("proxy error: %v", err) + } + } +} diff --git a/proxy/cmd/root.go b/proxy/cmd/root.go new file mode 100644 index 00000000..8b2e0081 --- /dev/null +++ b/proxy/cmd/root.go @@ -0,0 +1,130 @@ +package cmd + +import ( + "context" + "fmt" + "os" + "os/signal" + "syscall" + + "github.com/containers/podman-bootc/pkg/vsock" + log "github.com/sirupsen/logrus" + "github.com/spf13/cobra" +) + +type mode string + +const ( + unixToVsock mode = "unixToVsock" + vsockToUnix mode = "vsockToUnix" +) + +func (m *mode) String() string { + return string(*m) +} + +func (m *mode) Set(val string) error { + switch val { + case string(vsockToUnix), string(unixToVsock): + *m = mode(val) + return nil + default: + return fmt.Errorf("invalid mode: %s (must be '%s' or '%s')", val, unixToVsock, vsockToUnix) + } +} + +func (m *mode) Type() string { + return "mode" +} + +type rootCmd struct { + proxy *vsock.Proxy + logLevel string + listenMode mode + cid uint32 + port uint32 + socket string +} + +func NewRootCmd() *cobra.Command { + c := rootCmd{} + cmd := &cobra.Command{ + Use: "proxy", + Short: "Proxy connections between VSOCK and UNIX socket", + Long: "Proxy the connection between VSOCK and UNIX socket based on the direction", + PersistentPreRunE: c.preExec, + RunE: func(cmd *cobra.Command, _ []string) error { + return c.run() + }, + } + + cmd.PersistentFlags().Uint32VarP(&c.cid, "cid", "c", 0, "CID allocated by the VM") + cmd.PersistentFlags().Uint32VarP(&c.port, "port", "p", 0, "Port for the VSOCK on the VM") + cmd.PersistentFlags().StringVarP(&c.socket, "socket", "s", "", "Socket for the proxy") + cmd.PersistentFlags().StringVarP(&c.logLevel, "log-level", "", "", "Set log level") + cmd.PersistentFlags().VarP(&c.listenMode, "listen-mode", "l", + fmt.Sprintf("Direction for the listentin proxy, values: %s or %s", unixToVsock, vsockToUnix)) + cmd.MarkPersistentFlagRequired("port") + cmd.MarkPersistentFlagRequired("socket") + cmd.MarkPersistentFlagRequired("listen-mode") + + return cmd +} + +func (c *rootCmd) preExec(cmd *cobra.Command, args []string) error { + if c.logLevel != "" { + level, err := log.ParseLevel(c.logLevel) + if err != nil { + return err + } + log.SetLevel(level) + } else { + log.SetLevel(log.InfoLevel) + } + socket, _ := cmd.Flags().GetString("socket") + if socket == "" { + return fmt.Errorf("the socket needs to be set") + } + + return nil +} + +func (c *rootCmd) validateArgs() error { + if c.port == 0 { + return fmt.Errorf("the port cannot be 0") + } + if c.listenMode == unixToVsock && c.cid == 0 { + return fmt.Errorf("the cid cannot be 0 when the listen mode is unixToVsock") + } + + return nil +} + +func (c *rootCmd) run() error { + if err := c.validateArgs(); err != nil { + return err + } + switch c.listenMode { + case vsockToUnix: + c.proxy = vsock.NewProxyVSockToUnixSocket(c.port, c.socket) + case unixToVsock: + c.proxy = vsock.NewProxyUnixSocketToVsock(c.port, c.cid, c.socket) + } + + if err := c.proxy.Start(); err != nil { + return err + } + defer c.proxy.Stop() + + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer stop() + <-ctx.Done() + + return nil +} + +func Execute() { + if err := NewRootCmd().Execute(); err != nil { + os.Exit(1) + } +} diff --git a/proxy/proxy.go b/proxy/proxy.go new file mode 100644 index 00000000..1eed0ded --- /dev/null +++ b/proxy/proxy.go @@ -0,0 +1,7 @@ +package main + +import "github.com/containers/podman-bootc/proxy/cmd" + +func main() { + cmd.Execute() +}