diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d918deb..bc535d9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,7 +1,7 @@ name: Main CI on: push: - branches: + branches: - main paths: - 'images/**' diff --git a/.github/workflows/matrix-gen-ci.yml b/.github/workflows/matrix-gen-ci.yml index 87b87a5..95b6a9b 100644 --- a/.github/workflows/matrix-gen-ci.yml +++ b/.github/workflows/matrix-gen-ci.yml @@ -10,7 +10,7 @@ jobs: steps: - name: Checkout repo ⤵️ uses: actions/checkout@v4 - + - name: Build matrix_gen working-directory: ./matrix_gen run: go build . diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index cb393a1..682aa10 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -1,7 +1,7 @@ name: PR CI on: pull_request: - branches: + branches: - main paths: - 'images/**' diff --git a/.github/workflows/reusable_build_images.yml b/.github/workflows/reusable_build_images.yml index dd92ed6..a34c8a4 100644 --- a/.github/workflows/reusable_build_images.yml +++ b/.github/workflows/reusable_build_images.yml @@ -1,6 +1,6 @@ name: Build and Push docker images on: - workflow_call: + workflow_call: inputs: version: description: 'docker images version to be built/tagged' @@ -26,7 +26,7 @@ jobs: matrix: arch: [amd64, arm64] runs-on: ${{ (matrix.arch == 'arm64' && 'ubuntu-22.04-arm') || 'ubuntu-22.04' }} - steps: + steps: - name: Checkout repo uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -42,12 +42,12 @@ jobs: if: inputs.push run: | echo "PUSH=true" >> $GITHUB_ENV - + - name: Set LATEST env var if: inputs.is_latest run: | echo "LATEST=true" >> $GITHUB_ENV - + - name: Set TAG env var run: | echo "TAG=${{ inputs.version }}" >> $GITHUB_ENV diff --git a/README.md b/README.md index 617f021..90b53a6 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,43 @@ -[![Falco kernel tests Repository](https://github.com/falcosecurity/evolution/blob/main/repos/badges/falco-infra-blue.svg)](https://github.com/falcosecurity/evolution/blob/main/REPOSITORIES.md#infra-scope) +[![Falco kernel tests Repository](https://github.com/falcosecurity/evolution/blob/main/repos/badges/falco-infra-blue.svg)](https://github.com/falcosecurity/evolution/blob/main/REPOSITORIES.md#infra-scope) [![Incubating](https://img.shields.io/badge/status-incubating-orange?style=for-the-badge)](https://github.com/falcosecurity/evolution/blob/main/REPOSITORIES.md#incubating) ![Architectures](https://img.shields.io/badge/ARCHS-x86__64%7Caarch64-blueviolet?style=for-the-badge) [![Latest release](https://img.shields.io/github/v/release/falcosecurity/kernel-testing?style=for-the-badge)](https://github.com/falcosecurity/kernel-testing/releases/latest) # Falco drivers tests -This repository automatically runs Falco [scap-open](https://github.com/falcosecurity/libs/tree/master/userspace/libscap/examples/01-open) binary on all supported drivers through Ansible, spawning Firecracker microVMs to test Falco drivers against multiple kernels. -You can find list of machines being used [here](./ansible-playbooks/group_vars/all/vars.yml#L18). +This repository automatically runs +Falco [scap-open](https://github.com/falcosecurity/libs/tree/master/userspace/libscap/examples/01-open) binary on all +supported drivers through Ansible, spawning Firecracker microVMs to test Falco drivers against multiple kernels. +You can find list of machines being used [here](./ansible-playbooks/group_vars/all/vars.yml#L19). +You can find a document explaining the architecture of the solution [here](./architecture.md). Please read it carefully +before deploying it. ## Prerequisites -* Install [Ansible](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html) -* Install [Ignite](https://ignite.readthedocs.io/en/stable/installation/) from `therealbobo` fork (use `main` branch): https://github.com/therealbobo/ignite; just issue `make` and then `sudo make install` to install everything needed under `/usr/local/`. -* Install ignite CNI plugins by following this guide: https://ignite.readthedocs.io/en/stable/installation/#cni-plugins: -```bash -export CNI_VERSION=v0.9.1 -export ARCH=$([ $(uname -m) = "x86_64" ] && echo amd64 || echo arm64) -sudo mkdir -p /opt/cni/bin -curl -sSL https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/cni-plugins-linux-${ARCH}-${CNI_VERSION}.tgz | sudo tar -xz -C /opt/cni/bin -``` +The following is the list of main prerequisites, each one annotated with a suggested (tested) version: + +* Ansible -> `2.16.3-0ubuntu2` +* Firecracker -> `1.13.1` +* Docker -> whatever is available +* Golang -> `1.25.4` + +Exemplary instructions, installing and configuring all needed dependencies, and configuring host networking, can be +found in [config_example_amd64.sh](./config_example_amd64.sh). The script is not intended to be run as is: it is just +demonstrative, and is required for the user to go through it and adapt the different parts to the specific environment. ## Configure It is advised to avoid directly modifying [`vars.yml`](ansible-playbooks/group_vars/all/vars.yml) file; -instead one can create a local vars.yml file to override keys from the default vars. +instead one can create a local vars.yml file to override keys from the default vars. The only mandatory thing to be configured is an ssh key pair: + ```yml #Path to the generated SSH private key file ssh_key_path: "" # <-- Replace here with the key path ssh_key_name: "" # <-- Replace here with the key name ``` + ## Run From the `ansible-playbooks` directory you can run tests on all machines by typing: @@ -45,7 +52,7 @@ To rerun tests: ansible-playbook scap-open.yml --ask-become --extra-vars "@/path/to/local/vars.yaml" ``` -To cleanup all machines +To clean up all machines ```bash ansible-playbook clean-up.yml --ask-become --extra-vars "@/path/to/local/vars.yaml" @@ -53,8 +60,11 @@ ansible-playbook clean-up.yml --ask-become --extra-vars "@/path/to/local/vars.ya ## CI Usage -To better suit the CI usage, a [Github composite action](https://docs.github.com/en/actions/creating-actions/creating-a-composite-action) has been developed. -Therefore, running kernel-testing in your Github workflow is as easy as adding this step: +To better suit the CI usage, +a [GitHub composite action](https://docs.github.com/en/actions/creating-actions/creating-a-composite-action) has been +developed. +Therefore, running kernel-testing in your GitHub workflow is as easy as adding this step: + ``` - uses: falcosecurity/kernel-testing@main # Give it an id to be able to later use its outputs @@ -71,8 +81,14 @@ Therefore, running kernel-testing in your Github workflow is as easy as adding t # Whether to generate matrixes as matrix artifact. # Default: false build_matrix: 'true' + + # Images tag to be used, in the form vX.Y.Z + # Required. + images_tag: 'v0.3.2' ``` + Then you can use action outputs to retrieve artifacts: + ``` - uses: actions/upload-artifact@latest with: @@ -85,8 +101,10 @@ Then you can use action outputs to retrieve artifacts: path: ${{ steps.kernel_tests.outputs.matrix_output }} ``` -As an example, see [libs reusable workflow](https://github.com/falcosecurity/libs/blob/master/.github/workflows/reusable_kernel_tests.yaml). +As an example, +see [libs reusable workflow](https://github.com/falcosecurity/libs/blob/master/.github/workflows/reusable_kernel_tests.yaml). -> __NOTE:__ Since we don't use annotated tags, one cannot use eg: falcosecurity/kernel-testing@v0, but only either exact tag name or master. +> __NOTE:__ Since we don't use annotated tags, one cannot use eg: falcosecurity/kernel-testing@v0, but only either exact +> tag name or master. > __NOTE:__ Of course, you'll need to run your tests on virtualization-enabled nodes. diff --git a/action.yml b/action.yml index 783d068..5d20810 100644 --- a/action.yml +++ b/action.yml @@ -10,10 +10,13 @@ inputs: description: 'libs repo to be tested, eg: falcosecurity/libs' required: false default: 'falcosecurity/libs' - build_matrix: + build_matrix: description: 'Whether to generate matrixes as matrix artifact' required: false default: 'false' + images_tag: + description: 'Images tag to be used, in the form vX.Y.Z' + required: true outputs: ansible_output: @@ -21,7 +24,7 @@ outputs: value: ${{ steps.store-outputs.outputs.ansible }} matrix_output: description: "Uploaded matrix artifact name" - value: ${{ steps.store-outputs.outputs.matrix }} + value: ${{ steps.store-outputs.outputs.matrix }} runs: using: "composite" @@ -33,7 +36,7 @@ runs: cat > vars.yml <> $GITHUB_OUTPUT - echo "matrix=${{ github.action_path }}/matrix_gen/matrix.md" >> $GITHUB_OUTPUT - + echo "matrix=${{ github.action_path }}/matrix_gen/matrix.md" >> $GITHUB_OUTPUT + - name: Cleanup if: always() working-directory: ${{ github.action_path }}/ansible-playbooks shell: bash run: | - ansible-playbook clean-up.yml --extra-vars "@vars.yml" || : + ansible-playbook clean-up.yml --extra-vars "@vars.yml" || : diff --git a/ansible-playbooks/.gitignore b/ansible-playbooks/.gitignore new file mode 100644 index 0000000..712f264 --- /dev/null +++ b/ansible-playbooks/.gitignore @@ -0,0 +1 @@ +runtime/ diff --git a/ansible-playbooks/bootstrap.yml b/ansible-playbooks/bootstrap.yml index cb93a02..906eb27 100644 --- a/ansible-playbooks/bootstrap.yml +++ b/ansible-playbooks/bootstrap.yml @@ -1,5 +1,5 @@ --- -# Playbook used to generate the vm configuration files and start them using ignite tool. +# Playbook used to generate the vm configuration files and start them using firecracker. # Check the role for more information - name: Play for creating virtual machines hosts: localhost diff --git a/ansible-playbooks/common.yml b/ansible-playbooks/common.yml index 0368a0b..d3c4980 100644 --- a/ansible-playbooks/common.yml +++ b/ansible-playbooks/common.yml @@ -6,5 +6,5 @@ tasks: - name: Fix the dns issues ansible.builtin.shell: | - unlink /etc/resolv.conf && echo 'nameserver 1.1.1.1' > /etc/resolv.conf + echo 'nameserver 1.1.1.1' > /etc/resolv.conf changed_when: false diff --git a/ansible-playbooks/group_vars/all/vars.yml b/ansible-playbooks/group_vars/all/vars.yml index 8b979f2..fa29024 100644 --- a/ansible-playbooks/group_vars/all/vars.yml +++ b/ansible-playbooks/group_vars/all/vars.yml @@ -24,7 +24,6 @@ repo: "ghcr.io/falcosecurity/kernel-testing" machines: - {name: "amazonlinux2022-5.15", kernel: "{{ repo }}/amazonlinux2022-kernel:5.15-x86_64-{{ tag }}", rootfs: "{{ repo }}/amazonlinux2022-image:5.15-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - {name: "amazonlinux2023-6.1", kernel: "{{ repo }}/amazonlinux2023-kernel:6.1-x86_64-{{ tag }}", rootfs: "{{ repo }}/amazonlinux2023-image:6.1-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - - {name: "amazonlinux2-4.19", kernel: "{{ repo }}/amazonlinux2-kernel:4.19-x86_64-{{ tag }}", rootfs: "{{ repo }}/amazonlinux2-image:4.19-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - {name: "amazonlinux2-5.10", kernel: "{{ repo }}/amazonlinux2-kernel:5.10-x86_64-{{ tag }}", rootfs: "{{ repo }}/amazonlinux2-image:5.10-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - {name: "amazonlinux2-5.15", kernel: "{{ repo }}/amazonlinux2-kernel:5.15-x86_64-{{ tag }}", rootfs: "{{ repo }}/amazonlinux2-image:5.15-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - {name: "amazonlinux2-5.4", kernel: "{{ repo }}/amazonlinux2-kernel:5.4-x86_64-{{ tag }}", rootfs: "{{ repo }}/amazonlinux2-image:5.4-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] @@ -36,11 +35,9 @@ machines: - {name: "fedora-5.17", kernel: "{{ repo }}/fedora-kernel:5.17-x86_64-{{ tag }}", rootfs: "{{ repo }}/fedora-image:5.17-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - {name: "fedora-5.8", kernel: "{{ repo }}/fedora-kernel:5.8-x86_64-{{ tag }}", rootfs: "{{ repo }}/fedora-image:5.8-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - {name: "fedora-6.2", kernel: "{{ repo }}/fedora-kernel:6.2-x86_64-{{ tag }}", rootfs: "{{ repo }}/fedora-image:6.2-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - - {name: "oraclelinux-3.10", kernel: "{{ repo }}/oraclelinux-kernel:3.10-x86_64-{{ tag }}", rootfs: "{{ repo }}/oraclelinux-image:3.10-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - {name: "oraclelinux-4.14", kernel: "{{ repo }}/oraclelinux-kernel:4.14-x86_64-{{ tag }}", rootfs: "{{ repo }}/oraclelinux-image:4.14-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - {name: "oraclelinux-5.15", kernel: "{{ repo }}/oraclelinux-kernel:5.15-x86_64-{{ tag }}", rootfs: "{{ repo }}/oraclelinux-image:5.15-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - {name: "oraclelinux-5.4", kernel: "{{ repo }}/oraclelinux-kernel:5.4-x86_64-{{ tag }}", rootfs: "{{ repo }}/oraclelinux-image:5.4-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - - {name: "ubuntu-4.15", kernel: "{{ repo }}/ubuntu-kernel:4.15-x86_64-{{ tag }}", rootfs: "{{ repo }}/ubuntu-image:4.15-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - {name: "ubuntu-5.8", kernel: "{{ repo }}/ubuntu-kernel:5.8-x86_64-{{ tag }}", rootfs: "{{ repo }}/ubuntu-image:5.8-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - {name: "ubuntu-6.5", kernel: "{{ repo }}/ubuntu-kernel:6.5-x86_64-{{ tag }}", rootfs: "{{ repo }}/ubuntu-image:6.5-x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - {name: "amazonlinux2022-5.15", kernel: "{{ repo }}/amazonlinux2022-kernel:5.15-aarch64-{{ tag }}", rootfs: "{{ repo }}/amazonlinux2022-image:5.15-aarch64-{{ tag }}", arch: "aarch64"} # noqa: yaml[line-length] @@ -51,16 +48,16 @@ machines: - {name: "ubuntu-6.5", kernel: "{{ repo }}/ubuntu-kernel:6.5-aarch64-{{ tag }}", rootfs: "{{ repo }}/ubuntu-image:6.5-aarch64-{{ tag }}", arch: "aarch64"} # noqa: yaml[line-length] builders: - - {name: "centos-builder", kernel: "weaveworks/ignite-kernel:5.14.16", rootfs: "{{ repo }}/builder:x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - - {name: "fedora-builder", kernel: "weaveworks/ignite-kernel:5.14.16", rootfs: "{{ repo }}/modernprobe-builder:x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] - - {name: "centos-builder", kernel: "weaveworks/ignite-kernel:5.14.16", rootfs: "{{ repo }}/builder:aarch64-{{ tag }}", arch: "aarch64"} # noqa: yaml[line-length] - - {name: "fedora-builder", kernel: "weaveworks/ignite-kernel:5.14.16", rootfs: "{{ repo }}/modernprobe-builder:aarch64-{{ tag }}", arch: "aarch64"} # noqa: yaml[line-length] + - {name: "centos-builder", kernel: "{{ repo }}/ubuntu-kernel:6.5-x86_64-{{ tag }}", rootfs: "{{ repo }}/builder:x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] + - {name: "fedora-builder", kernel: "{{ repo }}/fedora-kernel:6.2-x86_64-{{ tag }}", rootfs: "{{ repo }}/modernprobe-builder:x86_64-{{ tag }}", arch: "x86_64"} # noqa: yaml[line-length] + - {name: "centos-builder", kernel: "{{ repo }}/ubuntu-kernel:6.5-aarch64-{{ tag }}", rootfs: "{{ repo }}/builder:aarch64-{{ tag }}", arch: "aarch64"} # noqa: yaml[line-length] + - {name: "fedora-builder", kernel: "{{ repo }}/fedora-kernel:6.2-aarch64-{{ tag }}", rootfs: "{{ repo }}/modernprobe-builder:aarch64-{{ tag }}", arch: "aarch64"} # noqa: yaml[line-length] output_dir: "~/ansible_output" # Number of cpus. cpus: 2 -# Memory size in GB. -memory: 2 +# Memory size in mebibytes. +memory: 2048 # run_id is used to identify all the machines generated by a given run of the playbook. run_id: "here-goes-the-id" @@ -70,8 +67,8 @@ run_id: "here-goes-the-id" ##################### # Path to the generated SSH private key file -ssh_key_path: "/root/.ssh/" -ssh_key_name: ignite_machines +ssh_key_path: "/root/.ssh" +ssh_key_name: firecracker_machines # Path to the private key prv_key_path: "{{ ssh_key_path }}/{{ ssh_key_name }}" @@ -96,3 +93,9 @@ repos: local_repos_folder: "./roles/git_repos/files/repos" remote_repos_folder: "/root" + +# Directory where files that must persistent among different runs are stored. +cached_files_path: "/root/kernel_testing_ci_cache" + +# Directory where ephemeral, run-specific files are stored. +runtime_root: "{{ playbook_dir }}/runtime" diff --git a/ansible-playbooks/roles/bootstrap/.gitignore b/ansible-playbooks/roles/bootstrap/.gitignore deleted file mode 100644 index a37273b..0000000 --- a/ansible-playbooks/roles/bootstrap/.gitignore +++ /dev/null @@ -1 +0,0 @@ -files/ diff --git a/ansible-playbooks/roles/bootstrap/files/check_net_conflicts.sh b/ansible-playbooks/roles/bootstrap/files/check_net_conflicts.sh new file mode 100755 index 0000000..e7aefb0 --- /dev/null +++ b/ansible-playbooks/roles/bootstrap/files/check_net_conflicts.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +set -euo pipefail + +check_duplicates() { + FIELD="$1" + DUPLICATES="" + + DUPLICATES=$(jq -r " + group_by(.${FIELD}) + | map(select(length > 1)) + | map(.[0].${FIELD}) + | .[] + " <<< "$TAP_DEV_MAP" || true) + + if [[ -n "$DUPLICATES" ]]; then + echo "Error: Duplicate ${FIELD} values detected: $DUPLICATES" >&2 + exit 1 + fi +} + +TAP_DEV_MAP="$1" + +# Iterate over JSON objects one by one and check for any external conflict. +jq -r '.[] | "\(.name) \(.host_ip)"' <<<"$TAP_DEV_MAP" | while read -r TAP_NAME HOST_IP; do + + # Check tap name conflict. + if ip link show "$TAP_NAME" >/dev/null 2>&1; then + echo "Error: TAP device '$TAP_NAME' already exists" >&2 + exit 1 + fi + + # Check host IP address conflict. + if ! ip -o addr show to "$HOST_IP" >/dev/null 2>&1; then + echo "Error: Host already has an IP $HOST_IP" >&2 + exit 1 + fi +done + +# Check for any internal conflict in names and host IPs, separately. +check_duplicates "name" +check_duplicates "host_ip" \ No newline at end of file diff --git a/ansible-playbooks/roles/bootstrap/files/dnsmasq-tap@.service b/ansible-playbooks/roles/bootstrap/files/dnsmasq-tap@.service new file mode 100644 index 0000000..9c403eb --- /dev/null +++ b/ansible-playbooks/roles/bootstrap/files/dnsmasq-tap@.service @@ -0,0 +1,13 @@ +[Unit] +Description=Per-tap dnsmasq instance for %i (expected instance format: DEV:HOST_IP:GUEST_IP) +After=network.target +Requires=network.target + +[Service] +Type=exec +Environment="ARGS=%i" +ExecStart=/bin/bash -c 'IFS=: read DEV HOST_IP GUEST_IP <<< $ARGS; exec /usr/sbin/dnsmasq --keep-in-foreground --interface=$DEV --bind-dynamic --port=0 --dhcp-range=$GUEST_IP,$GUEST_IP,255.255.255.252,1h --dhcp-option=3,$HOST_IP --dhcp-option=6,1.1.1.1 --dhcp-leasefile=/run/dnsmasq-tap-$DEV.leases' +Restart=always + +[Install] +WantedBy=multi-user.target diff --git a/ansible-playbooks/roles/bootstrap/handlers/main.yml b/ansible-playbooks/roles/bootstrap/handlers/main.yml new file mode 100644 index 0000000..57fcca9 --- /dev/null +++ b/ansible-playbooks/roles/bootstrap/handlers/main.yml @@ -0,0 +1,3 @@ +- name: Reload systemd + ansible.builtin.systemd: + daemon_reload: true diff --git a/ansible-playbooks/roles/bootstrap/tasks/main.yml b/ansible-playbooks/roles/bootstrap/tasks/main.yml index 9e56fc4..1df0887 100644 --- a/ansible-playbooks/roles/bootstrap/tasks/main.yml +++ b/ansible-playbooks/roles/bootstrap/tasks/main.yml @@ -6,6 +6,8 @@ that: - machines is defined - ssh_key_path != "" + - runtime_root != "" + - run_id != "" - name: Check if the ~/.ssh directory exists, if not create it ansible.builtin.file: @@ -15,33 +17,23 @@ - name: Checking if ssh key exists and if not generate a new one community.crypto.openssh_keypair: - path: "{{ ssh_key_path }}/{{ ssh_key_name }}" + path: "{{ prv_key_path }}" -- name: Create the files directory in bootstrap role +- name: Create the cached files directory ansible.builtin.file: - path: "./roles/bootstrap/files" + path: "{{ cached_files_path }}" state: directory mode: '0755' -- name: Template the ignite-vm.yaml configuratin file for machines - ansible.builtin.template: - src: ignite-vm.yaml.j2 - dest: "./roles/bootstrap/files/{{ item.name }}.yaml" - mode: '0755' - loop: - "{{ machines }}" - when: item.arch == ansible_facts["architecture"] - delegate_to: localhost +- name: Set run files directory for the current run run_id={{ run_id }} + ansible.builtin.set_fact: + bootstrap_run_files_path: "{{ runtime_root }}/{{ run_id }}" -- name: Template the ignite-vm.yaml configuratin file for builders - ansible.builtin.template: - src: ignite-vm.yaml.j2 - dest: "./roles/bootstrap/files/{{ item.name }}.yaml" +- name: Create the run files directory run_id={{ run_id }} + ansible.builtin.file: + path: "{{ bootstrap_run_files_path }}" + state: directory mode: '0755' - loop: - "{{ builders }}" - when: item.arch == ansible_facts["architecture"] - delegate_to: localhost - name: Pull kernel and rootfs OCI images block: @@ -61,54 +53,251 @@ loop: "{{ machines | union(builders) }}" when: item.arch == ansible_facts["architecture"] -- name: Create virtual machines run_id={{ run_id }} - ansible.builtin.command: - cmd: ignite run --config "./roles/bootstrap/files/{{ item.name }}.yaml" --runtime docker - register: ignite_run - changed_when: ignite_run.rc == 0 +- name: Extract vmlinux and initrd from kernel OCI images # noqa: risky-shell-pipe + vars: + kernel_basename: "{{ item.kernel | basename | regex_replace(':', '.') }}" + args: + creates: "{{ cached_files_path }}/{{ kernel_basename }}.initrd" loop: "{{ machines | union(builders) }}" - when: item.arch == ansible_facts["architecture"] + when: item.arch == ansible_facts['architecture'] become: true + ansible.builtin.shell: | + # Enable Bash safety only if running under Bash + [ -n "$BASH_VERSION" ] && eval "set -eo pipefail" + + CID="" + TMP_DIR="" + + cleanup() { + [ -n "$CID" ] && docker rm "$CID" >/dev/null 2>&1 || : + [ -n "$TMP_DIR" ] && rm -rf "$TMP_DIR" || : + } + + # Trigger cleanup at exit. + trap cleanup EXIT + + # Create container. + CID=$(docker create "{{ item.kernel }}" /bin/sh) || exit 1 + + # Create a temporary directory. + TMP_DIR=$(mktemp -d) || exit 1 + + # Export and extract vmlinux and initrd while preserving permissions. + # note: --same-owner and --preserve-permissions requires this task to be run as root. + docker export "$CID" | tar -x \ + --same-owner \ + --preserve-permissions \ + -C "$TMP_DIR" \ + --strip-components=1 \ + boot/vmlinux \ + boot/initrd -- name: Wait for the VMs to be running run_id={{ run_id }} # noqa: risky-shell-pipe + # Move extracted vmlinux and initrd to destination. + mv "$TMP_DIR/vmlinux" "{{ cached_files_path }}/{{ kernel_basename }}.vmlinux" + mv "$TMP_DIR/initrd" "{{ cached_files_path }}/{{ kernel_basename }}.initrd" + +- name: Create raw disk images containing ext4 filesystem from rootfs OCI images # noqa: risky-shell-pipe + vars: + rootfs_basename: "{{ item.rootfs | basename | regex_replace(':', '.') }}" + disk_image: "{{ cached_files_path }}/{{ rootfs_basename }}.ext4" + args: + creates: "{{ disk_image }}" + loop: "{{ machines | union(builders) }}" + when: item.arch == ansible_facts['architecture'] + become: true ansible.builtin.shell: | - if test -v BASH; then set -o pipefail; fi - ignite ps -f \{\{.ObjectMeta.Name\}\}={{ item.name }}-{{ run_id }},\{\{.Status.Running\}\}=true | wc -l - register: result - until: result.stdout | int == 2 - retries: 5 - delay: 10 + # Enable Bash safety only if running under Bash. + [ -n "$BASH_VERSION" ] && eval "set -eo pipefail" + + CID="" + TMP_DIR="" + LOOPDEV="" + + cleanup() { + EXIT_STATUS=$? + [ "$EXIT_STATUS" -ne 0 ] && rm -rf "{{ disk_image }}" || : + [ -n "$CID" ] && docker rm "$CID" >/dev/null 2>&1 || : + [ -n "$LOOPDEV" ] && losetup -d "$LOOPDEV" >/dev/null 2>&1 || : + if [ -n "$TMP_DIR" ]; then + mountpoint -q "$TMP_DIR" && umount "$TMP_DIR" || : + rm -rf "$TMP_DIR" || : + fi + } + + # Trigger cleanup at exit. + trap cleanup EXIT + + # Create an initial empty disk image. + truncate -s 5G "{{ disk_image }}" + mkfs.ext4 -F "{{ disk_image }}" + + # Create container. + CID=$(docker create "{{ item.rootfs }}" /bin/sh) || exit 1 + + # Create a temporary directory. + TMP_DIR=$(mktemp -d) || exit 1 + + # Attach loop device explicitly + LOOPDEV=$(losetup -f --show "{{ disk_image }}") + + # Mount disk on a loop device and copy the image content into the disk. + mount -o loop "{{ disk_image }}" "$TMP_DIR" + docker export "$CID" | tar -C "$TMP_DIR" -xf - + + # Finalize and check disk integrity. + sync + umount "$TMP_DIR" + losetup -d "$LOOPDEV" + LOOPDEV="" + e2fsck -fy "{{ disk_image }}" + +- name: Clone raw disk images for the current run run_id={{ run_id }} + vars: + rootfs_basename: "{{ item.rootfs | basename | regex_replace(':', '.') }}" + original_image: "{{ cached_files_path }}/{{ rootfs_basename }}.ext4" + working_image: "{{ bootstrap_run_files_path }}/{{ rootfs_basename }}.ext4" + # Create a lightweight CoW clone (if fs supports reflinks) and preserve raw disk sparseness. + ansible.builtin.command: > + cp --sparse=always --reflink=auto "{{ original_image }}" "{{ working_image }}" + args: + creates: "{{ working_image }}" loop: "{{ machines | union(builders) }}" - when: item.arch == ansible_facts["architecture"] - changed_when: result.stdout | int == 2 + when: item.arch == ansible_facts['architecture'] become: true -- name: Get IP of the VMs(machines) and register them in a variable run_id={{ run_id }} - ansible.builtin.command: - cmd: ignite ps -f \{\{.ObjectMeta.Name\}\}={{ item.name }}-{{ run_id }},\{\{.Status.Running\}\}=true -t \{\{.Status.Network.IPAddresses\}\} - register: machine_ips - failed_when: machine_ips.stdout_lines | length != 1 - changed_when: machine_ips.stdout_lines | length == 1 - loop: "{{ machines }}" - when: item.arch == ansible_facts["architecture"] +- name: Setup ssh inside rootfs ext4 image clones + vars: + rootfs_basename: "{{ item.rootfs | basename | regex_replace(':', '.') }}" + disk_image: "{{ bootstrap_run_files_path }}/{{ rootfs_basename }}.ext4" become: true + block: + - name: Ensure filesystem is clean before modifying rootfs ext4 image clones + ansible.builtin.command: e2fsck -fy "{{ disk_image }}" + changed_when: false + loop: "{{ machines | union(builders) }}" + when: item.arch == ansible_facts['architecture'] + + - name: Ensure /root/.ssh exists inside rootfs ext4 image clones + ansible.builtin.shell: | + e2mkdir -P 700 "{{ disk_image }}:/root/.ssh" || : + changed_when: false + loop: "{{ machines | union(builders) }}" + when: item.arch == ansible_facts['architecture'] + + - name: Copy public key inside rootfs ext4 image clones + ansible.builtin.shell: | + e2cp -P 600 "{{ pub_key_path }}" "{{ disk_image }}":/root/.ssh/authorized_keys + changed_when: false + loop: "{{ machines | union(builders) }}" + when: item.arch == ansible_facts['architecture'] + +- name: Run common/tasks/compute_tap_dev_map.yml + ansible.builtin.import_role: + name: common + tasks_from: compute_tap_dev_map + +- name: Verify any conflict in tap device map + ansible.builtin.command: > + "{{ role_path }}/files/check_net_conflicts.sh" {{ common_tap_dev_map | to_json | quote }} + changed_when: false + +- name: Create and configure tap devices for VMs + become: true + block: + - name: Create tap devices + vars: + tap_dev_name: "{{ common_tap_dev_map[item.name].name }}" + ansible.builtin.command: ip tuntap add dev "{{ tap_dev_name }}" mode tap + args: + creates: "/sys/class/net/{{ tap_dev_name }}/ifindex" + loop: "{{ machines | union(builders) }}" + when: item.arch == ansible_facts["architecture"] + + - name: Set tap devices up + vars: + tap_dev_name: "{{ common_tap_dev_map[item.name].name }}" + ansible.builtin.command: ip link set "{{ tap_dev_name }}" up + loop: "{{ machines | union(builders) }}" + when: item.arch == ansible_facts["architecture"] + changed_when: false -- name: Get IP of the VMs(builders) and register them in a variable run_id={{ run_id }} - ansible.builtin.command: - cmd: ignite ps -f \{\{.ObjectMeta.Name\}\}={{ item.name }}-{{ run_id }},\{\{.Status.Running\}\}=true -t \{\{.Status.Network.IPAddresses\}\} - register: builders_ips - failed_when: builders_ips.stdout_lines | length != 1 - changed_when: builders_ips.stdout_lines | length == 1 - loop: "{{ builders }}" + - name: Set IP addresses on tap devices + vars: + tap_dev_name: "{{ common_tap_dev_map[item.name].name }}" + ip_addr: "{{ common_tap_dev_map[item.name].host_ip }}/30" + ansible.builtin.command: ip addr add "{{ ip_addr }}" dev "{{ tap_dev_name }}" + loop: "{{ machines | union(builders) }}" + when: + - item.arch == ansible_facts["architecture"] + - ip_addr not in lookup('pipe', 'ip -o addr show dev ' ~ tap_dev_name) + changed_when: true + +- name: Start dnsmasq services + become: true + block: + - name: Install dnsmasq-tap@.service + ansible.builtin.copy: + src: dnsmasq-tap@.service + dest: /etc/systemd/system/dnsmasq-tap@.service + owner: root + group: root + mode: "0644" + notify: Reload systemd + + - name: Execute notified systemd reloading handler + ansible.builtin.meta: flush_handlers + + - name: Start dnsmasq service on each tap device + vars: + tap_dev_name: "{{ common_tap_dev_map[item.name].name }}" + host_ip: "{{ common_tap_dev_map[item.name].host_ip }}" + guest_ip: "{{ common_tap_dev_map[item.name].guest_ip }}" + ansible.builtin.systemd: + name: "dnsmasq-tap@{{ tap_dev_name }}:{{ host_ip }}:{{ guest_ip }}" + state: started + loop: "{{ machines | union(builders) }}" + when: item.arch == ansible_facts['architecture'] + +- name: Template the vmconfig.json.j2 configuration file for machine and builders + vars: + kernel_basename: "{{ item.kernel | basename | regex_replace(':', '.') }}" + rootfs_basename: "{{ item.rootfs | basename | regex_replace(':', '.') }}" + vmlinux_path: "{{ cached_files_path }}/{{ kernel_basename }}.vmlinux" + initrd_path: "{{ cached_files_path }}/{{ kernel_basename }}.initrd" + rootfs_disk_path: "{{ bootstrap_run_files_path }}/{{ rootfs_basename }}.ext4" + tap_dev_name: "{{ common_tap_dev_map[item.name].name }}" + ansible.builtin.template: + src: vmconfig.json.j2 + dest: "{{ bootstrap_run_files_path }}/{{ item.name }}.json" + mode: '0755' + loop: "{{ machines | union(builders) }}" + when: item.arch == ansible_facts["architecture"] + +- name: Create virtual machines run_id={{ run_id }} + vars: + vm_name: "{{ item.name | regex_replace('[.]', '-') }}-{{ run_id }}" + vm_config_path: "{{ bootstrap_run_files_path }}/{{ item.name }}.json" + vm_socket_path: "/tmp/{{ run_id }}-{{ item.name }}.sock" + vm_logs_path: "{{ bootstrap_run_files_path }}/{{ item.name }}.log" + ansible.builtin.shell: > + RUST_LOG=debug nohup firecracker \ + --no-seccomp \ + --id "{{ vm_name }}" \ + --config-file "{{ vm_config_path }}" \ + --api-sock "{{ vm_socket_path }}" \ + > "{{ vm_logs_path }}" 2>&1 < /dev/null & + args: + executable: /bin/bash + changed_when: true + loop: "{{ machines | union(builders) }}" when: item.arch == ansible_facts["architecture"] become: true -- name: Template the inventory.ini.j2 configuration file to invetory.ini +- name: Template the inventory.ini.j2 configuration file to inventory.ini ansible.builtin.template: src: inventory.ini.j2 dest: inventory.ini mode: '0755' - delegate_to: localhost - name: Refresh inventory to ensure that the new generated one is used ansible.builtin.meta: refresh_inventory diff --git a/ansible-playbooks/roles/bootstrap/templates/ignite-vm.yaml.j2 b/ansible-playbooks/roles/bootstrap/templates/ignite-vm.yaml.j2 deleted file mode 100644 index f5a48ae..0000000 --- a/ansible-playbooks/roles/bootstrap/templates/ignite-vm.yaml.j2 +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: ignite.weave.works/v1alpha4 -kind: VM -metadata: - # Required, the name of the VM - name: {{ item.name }}-{{ run_id}} - labels: - run: {{ run_id }} -spec: - # Optional, how many vCPUs should be allocated for the VM - # Default: 1 - cpus: {{ cpus }} - # Optional, how much RAM should be allocated for the VM - # Default: 512MB - memory: {{ memory }}GB - - image: - # Required, what OCI image to use as the VM's rootfs - # For example: weaveworks/ignite-ubuntu:latest - oci: {{ item.rootfs }} - kernel: - # Required, what OCI rootfs to get the kernel binary (and optionally modules) from - # Default: weaveworks/ignite-kernel:5.10.51 - oci: {{ item.kernel}} - - - # Optional, provides automation to easily access your VM with the "ignite ssh" command - # If "ssh: true" is set, Ignite will generate an SSH key and copy the - # public key into the VM. This allows for automatic "ignite ssh" logins. - # Alternatively: specify a path to a public key to put in /root/.ssh/authorized_keys in the VM. - # Default: unset, no actions regarding SSH automation - ssh: {{ pub_key_path }} \ No newline at end of file diff --git a/ansible-playbooks/roles/bootstrap/templates/inventory.ini.j2 b/ansible-playbooks/roles/bootstrap/templates/inventory.ini.j2 index d1bbc88..cd41a5f 100644 --- a/ansible-playbooks/roles/bootstrap/templates/inventory.ini.j2 +++ b/ansible-playbooks/roles/bootstrap/templates/inventory.ini.j2 @@ -1,14 +1,14 @@ #this file is autogenerated by the bootstrap role [machines] -{% for result in machine_ips.results %} -{% if "skipped" not in result %} -{{ result.item.name }} ansible_host={{ result.stdout }} ansible_ssh_private_key_file={{ prv_key_path }} -{% endif%} +{% for item in machines %} +{% if item.arch == ansible_facts["architecture"] %} +{{ item.name }} ansible_host={{ common_tap_dev_map[item.name].guest_ip }} ansible_ssh_common_args='-o BindInterface={{ common_tap_dev_map[item.name].name }}' ansible_ssh_private_key_file={{ prv_key_path }} +{% endif %} {% endfor %} [builders] -{% for result in builders_ips.results %} -{% if "skipped" not in result %} -{{ result.item.name }} ansible_host={{ result.stdout }} ansible_ssh_private_key_file={{ prv_key_path }} -{% endif%} +{% for item in builders %} +{% if item.arch == ansible_facts["architecture"] %} +{{ item.name }} ansible_host={{ common_tap_dev_map[item.name].guest_ip }} ansible_ssh_common_args='-o BindInterface={{ common_tap_dev_map[item.name].name }}' ansible_ssh_private_key_file={{ prv_key_path }} +{% endif %} {% endfor %} diff --git a/ansible-playbooks/roles/bootstrap/templates/vmconfig.json.j2 b/ansible-playbooks/roles/bootstrap/templates/vmconfig.json.j2 new file mode 100644 index 0000000..83eb050 --- /dev/null +++ b/ansible-playbooks/roles/bootstrap/templates/vmconfig.json.j2 @@ -0,0 +1,26 @@ +{ + "boot-source": { + "kernel_image_path": "{{ vmlinux_path }}", + "initrd_path": "{{ initrd_path }}", + "boot_args": "rw console=ttyS0" + }, + "drives": [ + { + "drive_id": "rootfs", + "path_on_host": "{{ rootfs_disk_path }}", + "is_root_device": true, + "is_read_only": false + } + ], + "machine-config": { + "vcpu_count": {{ cpus }}, + "mem_size_mib": {{ memory }}, + "smt": false + }, + "network-interfaces": [ + { + "iface_id": "eth0", + "host_dev_name": "{{ tap_dev_name }}" + } + ] +} diff --git a/ansible-playbooks/roles/clean_up/tasks/main.yml b/ansible-playbooks/roles/clean_up/tasks/main.yml index a0418c8..781de80 100644 --- a/ansible-playbooks/roles/clean_up/tasks/main.yml +++ b/ansible-playbooks/roles/clean_up/tasks/main.yml @@ -2,83 +2,53 @@ # tasks file for clean-up # this role removes the cluster and the files created by the bootstrap role -- name: Find and save in a local variable all machines config files - ansible.builtin.find: - paths: ./roles/bootstrap/files - patterns: "*.yaml" - register: files_to_delete - -- name: Delete all the machine config files files - ansible.builtin.file: - path: "{{ item.path }}" - state: absent - with_items: "{{ files_to_delete.files }}" - - # We search all the machines created for this run_id. # We make sure that all machines are stopped and then delete them. - name: Stop running machines and delete them become: true block: - - name: Get name of running machines run_id={{ run_id }} - ansible.builtin.command: - cmd: ignite ps --all -f \{\{.Labels.run\}\}={{ run_id }},\{\{.Status.Running\}\}=true -t \{\{.Name\}\} - register: vms - changed_when: false - - name: Stop running machines run_id={{ run_id }} - ansible.builtin.command: - cmd: ignite kill {{ item }} - loop: "{{ vms.stdout_lines }}" - changed_when: false + vars: + vm_socket: "/tmp/{{ run_id }}-{{ item.name }}.sock" + ansible.builtin.uri: + url: "http://localhost/actions" + method: PUT + body: '{"action_type":"SendCtrlAltDel"}' + body_format: json + headers: + Content-Type: "application/json" + unix_socket: "{{ vm_socket }}" + loop: "{{ machines | union(builders) }}" + when: item.arch == ansible_facts["architecture"] + changed_when: true + failed_when: false - - name: Get name of stopped machines run_id={{ run_id }} - ansible.builtin.command: - cmd: ignite ps --all -f \{\{.Labels.run\}\}={{ run_id }} -t \{\{.Name\}\} - register: vms + - name: Wait for all machines to stop run_id={{ run_id }} + ansible.builtin.command: pgrep -f "firecracker.*--id .*{{ run_id }}" + register: clean_up_pgrep_result + failed_when: false changed_when: false + until: clean_up_pgrep_result.rc != 0 + retries: 10 + delay: 1 - - name: Delete machines run_id={{ run_id }} - ansible.builtin.command: - cmd: ignite rm {{ item }} - loop: "{{ vms.stdout_lines }}" - changed_when: false + - name: Force-kill remaining machines run_id={{ run_id }} + ansible.builtin.command: pkill -KILL -f "firecracker.*--id .*{{ run_id }}" + register: clean_up_pkill_result + failed_when: false + changed_when: clean_up_pkill_result.rc == 0 + when: clean_up_pgrep_result.rc == 0 -# Ignite imports the images from the OCI ones and caches them. -# If the images change in the remote repository, ignite continues to use -# the cached ones. During the clean phase, we make sure to remove them from -# the cache. -- name: Remove rootfs and kernel images from ignite cache - become: true - ignore_errors: true # noqa: ignore-errors - block: - - name: List cached images - ansible.builtin.command: - cmd: ignite images ls -q - register: images - changed_when: false + - name: Delete unix socket files created by machines run_id={{ run_id }} + ansible.builtin.file: + path: "/tmp/{{ run_id }}-{{ item.name }}.sock" + state: absent + loop: "{{ machines | union(builders) }}" + when: item.arch == ansible_facts["architecture"] - - name: Remove cached images from ignite - ansible.builtin.command: - cmd: ignite image rm {{ item }} - loop: "{{ images.stdout_lines }}" - changed_when: false - - - name: List cached kernels - ansible.builtin.command: - cmd: ignite kernel ls -q - register: kernels - changed_when: false - - - name: Remove cached kernels from ignite - ansible.builtin.command: - cmd: ignite kernel rm {{ item }} - loop: "{{ kernels.stdout_lines }}" - changed_when: false - -- name: Remove the files directory in the bootstrap role +- name: Remove the run files directory ansible.builtin.file: - path: "./roles/bootstrap/files" + path: "{{ runtime_root }}/{{ run_id }}" state: absent - name: Remove the inventory.ini file @@ -90,3 +60,30 @@ ansible.builtin.file: path: "{{ output_dir }}" state: absent + +- name: Run common/tasks/compute_tap_dev_map.yml + ansible.builtin.import_role: + name: common + tasks_from: compute_tap_dev_map + +- name: Stop all running dnsmasq-tap systemd services + become: true + block: + - name: Stop dnsmasq-tap systemd services + ansible.builtin.systemd: + name: "dnsmasq-tap@{{ item.name }}:{{ item.host_ip }}:{{ item.guest_ip }}" + state: stopped + loop: "{{ common_tap_dev_map.values() }}" + + - name: Remove all dnsmasq-tap DHCP lease files + ansible.builtin.file: + path: "/run/dnsmasq-tap-{{ item.name }}.leases" + state: absent + loop: "{{ common_tap_dev_map.values() }}" + +- name: Remove tap interfaces + ansible.builtin.command: ip link del "{{ item.name }}" + loop: "{{ common_tap_dev_map.values() }}" + failed_when: false + changed_when: false + become: true diff --git a/ansible-playbooks/roles/common/files/compute_tap_dev_map.sh b/ansible-playbooks/roles/common/files/compute_tap_dev_map.sh new file mode 100755 index 0000000..b2687e9 --- /dev/null +++ b/ansible-playbooks/roles/common/files/compute_tap_dev_map.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +set -euo pipefail + +calc_md5_hash() { + RUN_ID="$1" + VM_ID="$2" + + printf "%s" "${RUN_ID}-${VM_ID}" | md5sum | cut -d ' ' -f1 +} + +compute_tap() { + RUN_ID="$1" + VM_ID="$2" + + # Compute hash and get the first 12 characters. + ID=$(calc_md5_hash "$RUN_ID" "$VM_ID" | cut -c1-12) + + # Add "tap" prefix. + echo "tap$ID" +} + +compute_addresses() { + RUN_ID="$1" + VM_ID="$2" + + hash=$(calc_md5_hash "$RUN_ID" "$VM_ID") + + # Convert last 4 hex chars to integer and mask to 14 bits. + subnet_idx=$(( 0x${hash:28:4} & 0x3FFF )) + + # Compute the third and fourth octet (each /30 advances by 4 in the last octet). + THIRD_OCTET=$(( subnet_idx / 64 )) + FORTH_OCTET=$(( (subnet_idx % 64) * 4 )) + HOST_IP="172.16.$THIRD_OCTET.$(( FORTH_OCTET + 1 ))" + GUEST_IP="172.16.$THIRD_OCTET.$(( FORTH_OCTET + 2 ))" + echo "$HOST_IP $GUEST_IP" +} + +RUN_ID="$1" +VM_IDS="$2" + +# Create arrays to pass to jq later. +JQ_ARGS=() +JQ_CODE="{}" + +for VM_ID in $VM_IDS; do + read -r HOST_IP GUEST_IP <<< "$(compute_addresses "$RUN_ID" "$VM_ID")" + TAP=$(compute_tap "$RUN_ID" "$VM_ID") + + SAFE_VM_ID="${VM_ID//[^a-zA-Z0-9_]/_}" + + # Prepare named jq args. + JQ_ARGS+=( --arg "name_$SAFE_VM_ID" "$TAP" ) + JQ_ARGS+=( --arg "host_ip_$SAFE_VM_ID" "$HOST_IP" ) + JQ_ARGS+=( --arg "guest_ip_$SAFE_VM_ID" "$GUEST_IP" ) + + # Extend jq program. E.g.: .["1"] = {name: $name_1, host_ip: $host_ip_1, guest_ip: $guest_ip_1}. + JQ_CODE+=" | .[\"$VM_ID\"] = {name: \$name_$SAFE_VM_ID, host_ip: \$host_ip_$SAFE_VM_ID, guest_ip: \$guest_ip_$SAFE_VM_ID}" +done + +# Build the final JSON result. +jq -n "${JQ_ARGS[@]}" "$JQ_CODE" \ No newline at end of file diff --git a/ansible-playbooks/roles/common/tasks/compute_tap_dev_map.yml b/ansible-playbooks/roles/common/tasks/compute_tap_dev_map.yml new file mode 100644 index 0000000..320c747 --- /dev/null +++ b/ansible-playbooks/roles/common/tasks/compute_tap_dev_map.yml @@ -0,0 +1,14 @@ +- name: Compute tap device map + ansible.builtin.command: > + "{{ role_path }}/files/compute_tap_dev_map.sh" + "{{ run_id }}" + "{{ ((machines + builders) + | selectattr('arch', 'equalto', ansible_facts['architecture']) + | map(attribute='name') + | join(' ')) }}" + register: common_tap_dev_map_raw + changed_when: false + +- name: Set tap device map + ansible.builtin.set_fact: + common_tap_dev_map: "{{ common_tap_dev_map_raw.stdout | from_json }}" diff --git a/ansible-playbooks/roles/scap_open/tasks/main.yml b/ansible-playbooks/roles/scap_open/tasks/main.yml index df9e9e5..4d21617 100644 --- a/ansible-playbooks/roles/scap_open/tasks/main.yml +++ b/ansible-playbooks/roles/scap_open/tasks/main.yml @@ -2,7 +2,7 @@ # tasks file for scap_open - name: Setting output directory for results ansible.builtin.set_fact: - output_dest_dir: "{{ output_dir }}/scap-open-test/{{ inventory_hostname }}" + scap_open_output_dest_dir: "{{ output_dir }}/scap-open-test/{{ inventory_hostname }}" - name: Create output directory on localhost become: false @@ -10,7 +10,7 @@ block: - name: Create output directory if it does not exist ({{ output_dir }}) ansible.builtin.file: - path: "{{ output_dest_dir }}" + path: "{{ scap_open_output_dest_dir }}" state: directory mode: '0755' @@ -19,13 +19,13 @@ - name: Check modern-bpf support ansible.builtin.command: cmd: /tmp/scap-open --num_events 0 --modern_bpf - register: result + register: scap_open_result changed_when: false rescue: - name: Disable Modern Bpf support ansible.builtin.set_fact: scap_open_modern_bpf_supported: false - when: result.rc == 95 + when: scap_open_result.rc == 95 - name: Check Old Bpf Support block: @@ -41,7 +41,7 @@ path: "{{ remote_repos_folder }}/repos/{{ repos['libs'].name }}/build" state: directory mode: "0766" - register: cmake_result + register: scap_open_cmake_result - name: Prepare cmake for repository ansible.builtin.command: @@ -55,16 +55,16 @@ .. chdir: "{{ remote_repos_folder }}/repos/{{ repos['libs'].name }}/build" changed_when: false - register: cmake_result + register: scap_open_cmake_result rescue: - name: Print error message to stdout --- build directory ansible.builtin.debug: - var: cmake_result + var: scap_open_cmake_result always: - name: Dump error message to file ansible.builtin.copy: - content: "{{ cmake_result | to_nice_json }}" - dest: "{{ output_dest_dir }}/cmake-configure.json" + content: "{{ scap_open_cmake_result | to_nice_json }}" + dest: "{{ scap_open_output_dest_dir }}/cmake-configure.json" mode: '0755' delegate_to: localhost become: false @@ -80,26 +80,26 @@ - name: Build kmod ansible.builtin.command: - cmd: make driver -j {{ cpus }} + cmd: cmake --build . --target driver --parallel {{ cpus }} chdir: "{{ remote_repos_folder }}/repos/{{ repos['libs'].name }}/build" - register: km_result + register: scap_open_km_result changed_when: false - name: Load the kernel module ansible.builtin.command: cmd: insmod driver/scap.ko chdir: "{{ remote_repos_folder }}/repos/{{ repos['libs'].name }}/build" - register: km_result + register: scap_open_km_result changed_when: false rescue: - name: Print error message to stdout --- kernel module ansible.builtin.debug: - var: km_result + var: scap_open_km_result always: - name: Dump error message to file ansible.builtin.copy: - content: "{{ km_result | to_nice_json }}" - dest: "{{ output_dest_dir }}/kmod_build.json" + content: "{{ scap_open_km_result | to_nice_json }}" + dest: "{{ scap_open_output_dest_dir }}/kmod_build.json" mode: '0755' delegate_to: localhost become: false @@ -110,24 +110,24 @@ ansible.builtin.command: cmd: /tmp/scap-open --num_events 50 --kmod chdir: "{{ remote_repos_folder }}/repos/{{ repos['libs'].name }}/build" - register: result + register: scap_open_result changed_when: false - name: Unload the kernel module ansible.builtin.command: cmd: rmmod driver/scap.ko chdir: "{{ remote_repos_folder }}/repos/{{ repos['libs'].name }}/build" - register: result + register: scap_open_result changed_when: false rescue: - name: Print error message to stdout -- scap-open + kernel module ansible.builtin.debug: - var: result + var: scap_open_result always: - name: Dump error message to file ansible.builtin.copy: - content: "{{ result | to_nice_json }}" - dest: "{{ output_dest_dir }}/kmod_scap-open.json" + content: "{{ scap_open_result | to_nice_json }}" + dest: "{{ scap_open_output_dest_dir }}/kmod_scap-open.json" mode: '0755' delegate_to: localhost become: false @@ -136,20 +136,20 @@ block: - name: Build bpf probe ansible.builtin.command: - cmd: make bpf -j {{ cpus }} + cmd: cmake --build . --target bpf --parallel {{ cpus }} chdir: "{{ remote_repos_folder }}/repos/{{ repos['libs'].name }}/build" - register: bpf_probe_result + register: scap_open_bpf_probe_result when: scap_open_bpf_supported changed_when: false rescue: - name: Print error message to stdout --- build bpf probe ansible.builtin.debug: - var: bpf_probe_result + var: scap_open_bpf_probe_result always: - name: Dump error message to file ansible.builtin.copy: - content: "{{ bpf_probe_result | to_nice_json }}" - dest: "{{ output_dest_dir }}/bpf-probe_build.json" + content: "{{ scap_open_bpf_probe_result | to_nice_json }}" + dest: "{{ scap_open_output_dest_dir }}/bpf-probe_build.json" mode: '0755' delegate_to: localhost become: false @@ -160,18 +160,18 @@ ansible.builtin.command: cmd: /tmp/scap-open --num_events 50 --bpf driver/bpf/probe.o chdir: "{{ remote_repos_folder }}/repos/{{ repos['libs'].name }}/build" - register: result + register: scap_open_result when: scap_open_bpf_supported changed_when: false rescue: - name: Print error message to stdout --- scap-open + bpf probe ansible.builtin.debug: - var: result + var: scap_open_result always: - name: Dump error message to file ansible.builtin.copy: - content: "{{ result | to_nice_json }}" - dest: "{{ output_dest_dir }}/bpf-probe_scap-open.json" + content: "{{ scap_open_result | to_nice_json }}" + dest: "{{ scap_open_output_dest_dir }}/bpf-probe_scap-open.json" mode: '0755' delegate_to: localhost become: false @@ -182,18 +182,18 @@ ansible.builtin.command: cmd: /tmp/scap-open --num_events 50 --modern_bpf chdir: "{{ remote_repos_folder }}/repos/{{ repos['libs'].name }}/build" - register: result + register: scap_open_result when: scap_open_modern_bpf_supported changed_when: false rescue: - name: Print error message to stdout --- scap-open + modern probe ansible.builtin.debug: - var: result + var: scap_open_result always: - name: Dump error message to file ansible.builtin.copy: - content: "{{ result | to_nice_json }}" - dest: "{{ output_dest_dir }}/modern-bpf_scap-open.json" + content: "{{ scap_open_result | to_nice_json }}" + dest: "{{ scap_open_output_dest_dir }}/modern-bpf_scap-open.json" mode: '0755' delegate_to: localhost become: false diff --git a/ansible-playbooks/scap-open.yml b/ansible-playbooks/scap-open.yml index 5207c8a..8d12783 100644 --- a/ansible-playbooks/scap-open.yml +++ b/ansible-playbooks/scap-open.yml @@ -28,7 +28,7 @@ - name: Build skeleton ansible.builtin.command: - cmd: make ProbeSkeleton -j {{ cpus }} + cmd: cmake --build . --target ProbeSkeleton --parallel {{ cpus }} chdir: "{{ remote_repos_folder }}/repos/{{ repos['libs'].name }}/skeleton-build" changed_when: false register: cmake_result @@ -76,7 +76,7 @@ - name: Build scap-open with modern probe ansible.builtin.shell: - cmd: source /opt/rh/devtoolset-9/enable && make scap-open -j {{ cpus }} + cmd: source /opt/rh/devtoolset-9/enable && cmake --build . --target scap-open --parallel {{ cpus }} chdir: "{{ remote_repos_folder }}/repos/{{ repos['libs'].name }}/build" changed_when: false register: cmake_result diff --git a/architecture.md b/architecture.md new file mode 100644 index 0000000..cca1b48 --- /dev/null +++ b/architecture.md @@ -0,0 +1,58 @@ +# Architecture + +This document describes requirements and implementation details of the solution. + +## VM spawning + +### Requirements + +Each VM requires 3 elements to be spawned: + +1) a kernel binary (`vmlinux` file) +2) an initramfs (`initrd` file) +3) a `rootfs` ext4 raw disk image + +### Implementation + +`vmlinux` and `initrd` are extracted from the same corresponding `*-kernel*` docker image, while the `rootfs` is shipped +in a separate `*-image*` docker image. Extraction happens at runtime, and extracted artifacts are cached and reused for +later runs. On each run, a single `rootfs` ext4 disk image is CoW-cloned (shallow copy), and the ephemeral clone is +patched to enable SSHing from the host. + +## Networking + +### Requirements + +- the host must be able to SSH into VMs +- each VM must be able to connect to Internet, to download needed dependencies + +### Implementation + +Each VM is connected to the host through a TAP interface. For each VM, a `/30` subnet, taken from the `172.16.0.0/16` +range, is allocated. +The subnet is uniquely identified by the `run_id` and the machine name (as specified in +[vars.yml](./ansible-playbooks/group_vars/all/vars.yml)). +For each VM, the first address of the corresponding subnet is assigned to the TAP interface, while the second one is +assigned to the guest OS interface. +Each VM receives its networking configuration through DHCP. The networking configuration includes: + +- the guest interface IP address +- the default route (i.e.: the TAP interface IP address) +- DNS configuration (i.e.: `1.1.1.1`) + +A dedicated DHCP server (`dnsmasq` instance) is spawned for each TAP interface, specifically configured to offer the +above configuration. +The `dnsmsq` instances are spawned through `systemd`. Given an interface `tapX`, and the corresponding assigned subnet +`172.16.Y.Z/30`, the corresponding `systemd` service instance exposing the DHCP service will be named as follows: + +``` +`dnsmasq-tap@tapX:172.16.Y.{ Z + 1 }:172.16.Y.{ Z + 2 }.service` +``` + +Given the aforementioned requirements, on the host: + +- IP forwarding must be enabled for IPv4 +- reverse path filtering must be disabled on all interfaces +- traffic sourced from `172.16.0.0/16` and exiting the host external interface must be NATted +- the FORWARD chain must allow both incoming and outgoing traffic for `172.16.0.0/16` +- the INPUT chain must allow incoming traffic on `tap+` interfaces` diff --git a/config_example_amd64.sh b/config_example_amd64.sh new file mode 100644 index 0000000..ea461ab --- /dev/null +++ b/config_example_amd64.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +# NOTICE: This script is not intended to be run as is: it is just demonstrative, and is required for the user to go +# through it and adapt the different parts to the specific environment. + +### Install miscellaneous dependencies + +sudo apt update -y +sudo apt install -y git iproute2 dnsmasq e2tools e2fsprogs ca-certificates curl nano iputils-ping rsync + +### Install ansible ### +sudo apt update -y +sudo apt install -y software-properties-common +sudo add-apt-repository --yes --update ppa:ansible/ansible +sudo apt install -y ansible-core=2.16.3-0ubuntu2 + +### Install ansible requirements globally ### +git clone https://github.com/kernel-testing/kernel-testing.git +sudo mkdir -p /usr/share/ansible/collections +sudo chmod 755 /usr/share/ansible/collections +sudo ansible-galaxy collection install -r kernel-testing/requirements.yml -p /usr/share/ansible/collections +echo 'ANSIBLE_COLLECTIONS_PATHS=/usr/share/ansible/collections:$ANSIBLE_COLLECTIONS_PATHS' | sudo tee -a /etc/environment + +ansible-galaxy install -r kernel-testing/requirements.yml +sudo ansible-galaxy install -r kernel-testing/requirements.yml + +### Install firecracker ### +curl -LO https://github.com/firecracker-microvm/firecracker/releases/download/v1.13.1/firecracker-v1.13.1-x86_64.tgz +tar -xzf firecracker-v1.13.1-x86_64.tgz +sudo mv release-v1.13.1-x86_64/firecracker-v1.13.1-x86_64 /usr/local/bin/firecracker +sudo chmod +x /usr/local/bin/firecracker + +### Install docker ### +sudo apt install -y ca-certificates curl +sudo install -m 0755 -d /etc/apt/keyrings +sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc +sudo chmod a+r /etc/apt/keyrings/docker.asc +echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "${UBUNTU_CODENAME:-$VERSION_CODENAME}") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null +sudo apt update -y +sudo apt install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin + +### Install go globally ### +curl -LO https://go.dev/dl/go1.25.4.linux-amd64.tar.gz +sudo rm -rf /usr/local/go && sudo tar -C /usr/local -xzf go1.25.4.linux-amd64.tar.gz +echo 'PATH="/usr/local/go/bin:$PATH"' | sudo tee /etc/environment + +### Configure networking ### + +## Enable IP forwarding +sudo sysctl -w net.ipv4.ip_forward=1 +echo "net.ipv4.ip_forward = 1" | sudo tee /etc/sysctl.d/99-firecracker.conf + +## Disable reverse path filtering +CONFIG_FILE="/etc/sysctl.d/99-rp_filter.conf" +sudo bash -c "cat > $CONFIG_FILE" <<'EOF' +net.ipv4.conf.all.rp_filter = 0 +net.ipv4.conf.default.rp_filter = 0 +EOF +sudo sysctl --system +# just to be super sure that reverse path filtering is disabled for the current interfaces +for f in /proc/sys/net/ipv4/conf/*/rp_filter; do # + echo 0 | sudo tee "$f" +done + +## Configure iptables +# note: pay attention to use the right interface (in place of eth0) +sudo iptables -t nat -A POSTROUTING -s 172.16.0.0/16 -o eth0 -j MASQUERADE +sudo iptables -I FORWARD 1 -s 172.16.0.0/16 -j ACCEPT +sudo iptables -I FORWARD 2 -d 172.16.0.0/16 -j ACCEPT +sudo iptables -I INPUT 1 -i tap+ -j ACCEPT diff --git a/requirements.yml b/requirements.yml index e69af9e..3c68c43 100644 --- a/requirements.yml +++ b/requirements.yml @@ -1,5 +1,8 @@ --- collections: - name: community.docker + version: 4.8.2 - name: community.crypto + version: 2.26.1 - name: ansible.posix + version: 2.1.0