diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 00350fa..29ea860 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -119,7 +119,7 @@ jobs: if: always() - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: compliance-test path: artifacts @@ -148,8 +148,7 @@ jobs: run: ansible-galaxy collection install -r deployment/requirements.yml - name: Run Ansible playbook - # GitHub runner already has Docker installed. - run: ansible-playbook -i deployment/inventory deployment/site.yml --skip-tags docker + run: ansible-playbook -i deployment/inventory deployment/site.yml - name: Install Python client dependencies run: pip install -r scripts/requirements.txt diff --git a/deployment/group_vars/reductionist b/deployment/group_vars/all similarity index 92% rename from deployment/group_vars/reductionist rename to deployment/group_vars/all index 7e4ae30..82a37a7 100644 --- a/deployment/group_vars/reductionist +++ b/deployment/group_vars/all @@ -13,7 +13,7 @@ reductionist_image: "ghcr.io/stackhpc/reductionist-rs" reductionist_tag: "latest" # List of container networks. reductionist_networks: - - name: host + - host # Container environment. reductionist_env: OTEL_EXPORTER_JAEGER_AGENT_HOST: "{{ hostvars[(groups['jaeger'] | default([]) + [inventory_hostname])[0]].ansible_facts.default_ipv4.address }}" @@ -21,7 +21,7 @@ reductionist_env: REDUCTIONIST_HTTPS: "true" REDUCTIONIST_PORT: "8081" # Path to certificates directory on remote host. -reductionist_remote_certs_path: "{{ ansible_facts.env.HOME }}/.config/reductionist/certs" +reductionist_remote_certs_path: "{{ ansible_facts.env.HOME }}/certs" # Path to certificates directory in container. reductionist_container_certs_path: "/root/.config/reductionist/certs" # List of container volume mounts. diff --git a/deployment/group_vars/haproxy b/deployment/group_vars/haproxy index df01cf4..b25a0f2 100644 --- a/deployment/group_vars/haproxy +++ b/deployment/group_vars/haproxy @@ -1,6 +1,6 @@ --- # Global max connections. -haproxy_max_connections: 40000 +haproxy_max_connections: 20000 # Number of threads. haproxy_threads: 1 # Default max connections. diff --git a/deployment/inventory b/deployment/inventory index 6e98067..4be4def 100644 --- a/deployment/inventory +++ b/deployment/inventory @@ -35,7 +35,7 @@ localhost ansible_connection=local reductionist # Do not edit. -[docker:children] +[podman:children] haproxy jaeger minio diff --git a/deployment/requirements.yml b/deployment/requirements.yml index d1a4685..68b4a8e 100644 --- a/deployment/requirements.yml +++ b/deployment/requirements.yml @@ -1,3 +1,3 @@ --- collections: - - community.docker + - containers.podman diff --git a/deployment/site.yml b/deployment/site.yml index 79316fc..2343e97 100644 --- a/deployment/site.yml +++ b/deployment/site.yml @@ -1,44 +1,45 @@ --- # See deployment/README.md for usage -- name: Install Docker - hosts: docker +- name: Install Podman Docker + hosts: podman tags: - - docker - become: true + - podman tasks: - - name: Ensure docker is installed on Ubuntu + # Unprivileged user account requires linger to be enabled so podman will continue to run + # after the session has been terminated upon logout + # The command: loginctl show-user + # Should show: Linger=yes + # When enabled we should have the file "/var/lib/systemd/linger/ + - name: Determine linger state for Reductionist user + ansible.builtin.stat: + path: "/var/lib/systemd/linger/{{ ansible_facts['user_id'] }}" + register: systemd_linger_path_for_reductionist + - name: Enable linger for unprivileged Reductionist user + ansible.builtin.command: loginctl enable-linger {{ ansible_facts['user_id'] }} + changed_when: true + when: not systemd_linger_path_for_reductionist.stat.exists + become: true + + - name: Ensure podman docker is installed on Ubuntu when: ansible_facts["os_family"] | lower == "debian" ansible.builtin.package: - name: docker.io + name: + - passt + - podman-docker state: present update_cache: true + become: true - - name: Ensure docker repo signing key exists on RedHat - when: ansible_facts["os_family"] | lower == "redhat" - ansible.builtin.rpm_key: - key: "https://download.docker.com/linux/centos/gpg" - state: present - - - name: Ensure docker repo exists on RedHat - when: ansible_facts["os_family"] | lower == "redhat" - ansible.builtin.yum_repository: - name: docker - description: docker repository - baseurl: "https://download.docker.com/linux/centos/$releasever/$basearch/stable" - enabled: true - gpgcheck: true - gpgkey: "https://download.docker.com/linux/centos/gpg" - - - name: Ensure docker is installed on RedHat + - name: Ensure podman docker is installed on RedHat when: ansible_facts["os_family"] | lower == "redhat" ansible.builtin.package: name: - - docker-ce - - docker-ce-cli - - containerd.io + - passt + - podman-docker state: present update_cache: true + become: true - name: Ensure other system packages are present ansible.builtin.package: @@ -46,23 +47,12 @@ - git - python3-pip state: present - - - name: Check docker is running - ansible.builtin.service: - name: "docker" - enabled: true - state: started - - - name: Ensure docker python package is present - ansible.builtin.pip: - name: - - docker + become: true - name: Deploy step CA hosts: step-ca tags: - step-ca - become: true tasks: - name: Assert that there is only one CA server ansible.builtin.assert: @@ -70,16 +60,15 @@ groups['step-ca'] | length == 1 - name: Ensure step-ca container is running - community.docker.docker_container: + containers.podman.podman_container: name: step-ca env: DOCKER_STEPCA_INIT_NAME: "Smallstep" DOCKER_STEPCA_INIT_DNS_NAMES: "localhost,{{ ansible_facts.nodename }},{{ ansible_facts.default_ipv4.address }}" DOCKER_STEPCA_INIT_REMOTE_MANAGEMENT: "true" DOCKER_STEPCA_INIT_ADDRESS: ":9999" - image: smallstep/step-ca - networks: - - name: host + image: docker.io/smallstep/step-ca + network: host volumes: - "step:/home/step" @@ -92,9 +81,13 @@ - health_result.status == 200 - health_result.json.status == "ok" + - name: Set step config path + ansible.builtin.set_fact.set_fact: + step_config_path: "{{ ansible_env.HOME }}/step" + - name: Stat provisioner password file ansible.builtin.stat: - path: /root/.step/provisioner-password + path: "{{ step_config_path }}/provisioner-password" register: provisioner_password_stat - name: Get provisioner password @@ -107,9 +100,9 @@ changed_when: false when: not provisioner_password_stat.stat.exists - - name: Create .step directory + - name: Create path for storing step password ansible.builtin.file: - path: /root/.step + path: "{{ step_config_path }}" state: directory mode: "0700" when: not provisioner_password_stat.stat.exists @@ -122,7 +115,7 @@ - name: Write provisioner password ansible.builtin.copy: content: "{{ provisioner_password.stdout }}" - dest: /root/.step/provisioner-password + dest: "{{ step_config_path }}/provisioner-password" mode: "0600" when: not provisioner_password_stat.stat.exists @@ -137,13 +130,11 @@ dest: "{{ step_ca_root_cert_local_path }}" mode: "0600" delegate_to: localhost - become: false - name: Install step CLI hosts: step tags: - step - become: true tasks: - name: Ensure step Deb is installed when: ansible_facts["os_family"] | lower == "debian" @@ -151,6 +142,7 @@ deb: "https://dl.smallstep.com/gh-release/cli/docs-cli-install/v0.24.4/step-cli_0.24.4_amd64.deb" state: present update_cache: true + become: true - name: Ensure step RPM is installed when: ansible_facts["os_family"] | lower == "redhat" @@ -161,14 +153,19 @@ # Package step-cli_0.24.4_amd643z16ickc.rpm is not signed disable_gpg_check: true state: present + become: true - name: Test step ansible.builtin.command: step certificate inspect https://smallstep.com changed_when: false + - name: Set step config path + ansible.builtin.set_fact.set_fact: + step_config_path: "{{ ansible_env.HOME }}/step" + - name: Regenerate step config if requested ansible.builtin.file: - path: "/root/.step/{{ item }}" + path: "{{ step_config_path }}/{{ item }}" state: absent loop: - certs @@ -177,7 +174,7 @@ - name: Check whether step has been bootstrapped ansible.builtin.stat: - path: /root/.step/config/defaults.json + path: "{{ step_config_path }}/config/defaults.json" register: step_stat - name: Get CA fingerprint # noqa: run-once[task] @@ -187,24 +184,84 @@ delegate_to: "{{ groups['step-ca'][0] }}" run_once: true + # Running an unprivileged step will prompt to overwrite the CA unless we --force + # This writes config under the unprivileged deployment user's HOME directory + # and previously (when we ran privileged) would be coupled with --install + # to write the CA to the system truststore - name: Bootstrap CA ansible.builtin.command: > step ca bootstrap --ca-url https://{{ hostvars[groups['step-ca'][0]].ansible_facts.default_ipv4.address }}:9999 - --fingerprint {{ ca_fingerprint.stdout }} --install + --fingerprint {{ ca_fingerprint.stdout }} --force changed_when: true when: not step_stat.stat.exists + - name: Determine step path when executed unprivileged + ansible.builtin.command: step path + changed_when: false + register: reductionist_step_path + + # Install CA from unprivileged deployment user's config to system truststore - name: Install root certificate to system - ansible.builtin.shell: step certificate install $(step path)/certs/root_ca.crt + ansible.builtin.command: step certificate install {{ reductionist_step_path.stdout }}/certs/root_ca.crt changed_when: false + become: true when: not step_stat.stat.exists + - name: Check whether certificate exists + ansible.builtin.stat: + path: "{{ reductionist_remote_certs_path }}/cert.pem" + register: reductionist_cert_stat + + - name: Ensure remote certificate path exists + ansible.builtin.file: + path: "{{ reductionist_remote_certs_path }}" + state: directory + mode: "0711" + + - name: Generate a step token + ansible.builtin.command: >- + step ca token + --provisioner-password-file {{ step_config_path }}/provisioner-password + {{ reductionist_host }} + delegate_to: "{{ groups['step-ca'][0] }}" + changed_when: false + register: reductionist_step_token + + - name: Generate an initial certificate + ansible.builtin.command: >- + step ca certificate + --token {{ reductionist_step_token.stdout }} + --not-after {{ reductionist_cert_not_after }} + --force + {{ reductionist_host }} + {{ reductionist_remote_certs_path }}/cert.pem + {{ reductionist_remote_certs_path }}/key.pem + changed_when: true + when: not reductionist_cert_stat.stat.exists + + - name: Ensure certificate renewal systemd units exist + tags: privileged + ansible.builtin.template: + src: "{{ item }}.j2" + dest: "/etc/systemd/system/{{ item }}" + mode: "0600" + loop: + - reductionist-cert-renewer.service + - reductionist-cert-renewer.timer + become: true + + - name: Ensure certificate renewal systemd timer is enabled + tags: privileged + ansible.builtin.service: + name: reductionist-cert-renewer.timer + enabled: true + become: true + - name: Deploy Minio hosts: minio tags: - minio - become: true tasks: - name: Assert that there is only one Minio server ansible.builtin.assert: @@ -212,13 +269,12 @@ groups['minio'] | length == 1 - name: Ensure minio container is running - community.docker.docker_container: + containers.podman.podman_container: name: minio-server - command: server data --console-address ":9001" - image: minio/minio - keep_volumes: false - networks: - - name: host + command: server data --console-address :9001 + image: docker.io/minio/minio + delete_volumes: true + network: host volumes: /data - name: Wait for minio object storage to start @@ -240,49 +296,48 @@ hosts: prometheus tags: - prometheus - become: true tasks: - name: Assert that there is only one Prometheus server ansible.builtin.assert: that: groups['prometheus'] | length == 1 - - name: Ensure /etc/prometheus directory exists + - name: Ensure non-privileged user's prometheus directory exists ansible.builtin.file: - path: /etc/prometheus + path: "{{ ansible_env.HOME }}/prometheus" state: directory mode: "0755" - name: Ensure CA certificate is copied ansible.builtin.copy: src: "{{ prometheus_cacert }}" - dest: /etc/prometheus/cacert.pem + dest: "{{ ansible_env.HOME }}/prometheus/cacert.pem" mode: "0644" register: prometheus_cacert - name: Ensure prometheus.yml is templated ansible.builtin.template: src: prometheus.yml.j2 - dest: /etc/prometheus/prometheus.yml + dest: "{{ ansible_env.HOME }}/prometheus/prometheus.yml" mode: "0644" register: prometheus_yml - name: Ensure prometheus container is running - community.docker.docker_container: + containers.podman.podman_container: name: prometheus - image: prom/prometheus - networks: - - name: host + privileged: true # Rocky 9 SELinux prevents visibility of the host volumes + image: docker.io/prom/prometheus + network: host restart: "{{ prometheus_yml is changed or prometheus_cacert is changed }}" volumes: - - "/etc/prometheus:/etc/prometheus:ro" + - "{{ ansible_env.HOME }}/prometheus:/etc/prometheus:ro" - "prometheus:/prometheus" + become: false - name: Deploy Jaeger hosts: jaeger tags: - jaeger - become: true tasks: - name: Assert that there is only one Jaeger server ansible.builtin.assert: @@ -290,13 +345,12 @@ - groups['jaeger'] | length == 1 - name: Ensure jaeger container is running - community.docker.docker_container: + containers.podman.podman_container: name: jaeger env: COLLECTOR_ZIPKIN_HTTP_PORT: "9411" - image: jaegertracing/all-in-one:1.6 - networks: - - name: host + image: docker.io/jaegertracing/all-in-one:1.6 + network: host - name: Gather facts for Reductionist hosts: @@ -311,54 +365,7 @@ hosts: reductionist tags: - reductionist - become: true tasks: - - name: Check whether certificate exists - ansible.builtin.stat: - path: "{{ reductionist_remote_certs_path }}/cert.pem" - register: reductionist_cert_stat - - - name: Ensure remote certificate path exists - ansible.builtin.file: - path: "{{ reductionist_remote_certs_path }}" - state: directory - mode: "0700" - - - name: Generate a step token - ansible.builtin.command: >- - step ca token - --provisioner-password-file /root/.step/provisioner-password - {{ reductionist_host }} - delegate_to: "{{ groups['step-ca'][0] }}" - changed_when: false - register: reductionist_step_token - - - name: Generate an initial certificate - ansible.builtin.command: >- - step ca certificate - --token {{ reductionist_step_token.stdout }} - --not-after {{ reductionist_cert_not_after }} - --force - {{ reductionist_host }} - {{ reductionist_remote_certs_path }}/cert.pem - {{ reductionist_remote_certs_path }}/key.pem - changed_when: true - when: not reductionist_cert_stat.stat.exists - - - name: Ensure certificate renewal systemd units exist - ansible.builtin.template: - src: "{{ item }}.j2" - dest: "/etc/systemd/system/{{ item }}" - mode: "0600" - loop: - - reductionist-cert-renewer.service - - reductionist-cert-renewer.timer - - - name: Ensure certificate renewal systemd timer is enabled - ansible.builtin.service: - name: reductionist-cert-renewer.timer - enabled: true - - name: Clone reductionist repo ansible.builtin.git: repo: "{{ reductionist_src_url }}" @@ -367,21 +374,19 @@ when: reductionist_build_image | bool - name: Ensure reductionist image is built - community.docker.docker_image: + containers.podman.podman_image: name: "{{ reductionist_image }}" tag: "{{ reductionist_tag }}" - build: - network: host # Network to use for RUN cmds in dockerfile - needed to allow 'pip install...' in RedHat images - path: "{{ ansible_env.HOME }}/reductionist-rs" - source: build + path: "{{ ansible_env.HOME }}/reductionist-rs" when: reductionist_build_image | bool - name: Ensure reductionist container is running - community.docker.docker_container: + containers.podman.podman_container: name: "{{ reductionist_name }}" + privileged: true # Rocky 9 SELinux prevents visibility of the volume's certs otherwise env: "{{ reductionist_env }}" image: "{{ reductionist_image }}:{{ reductionist_tag }}" - networks: "{{ reductionist_networks }}" + network: "{{ reductionist_networks }}" volumes: "{{ reductionist_volumes }}" restart: true # Load new certificates. TODO: Hot reload @@ -397,7 +402,6 @@ hosts: haproxy tags: - haproxy - become: true tasks: # Currently we are not deploying any failover mechanism such as keepalived, # so limit to one HAProxy server. @@ -415,28 +419,28 @@ register: result loop: "{{ query('inventory_hostnames', 'reductionist') }}" - - name: Ensure /etc/haproxy directory exists + - name: Ensure non-privileged user's haproxy directory exists ansible.builtin.file: - path: /etc/haproxy + path: "{{ ansible_env.HOME }}/haproxy" state: directory mode: "0755" - name: Ensure haproxy.cfg is templated ansible.builtin.template: src: haproxy.cfg.j2 - dest: /etc/haproxy/haproxy.cfg + dest: "{{ ansible_env.HOME }}/haproxy/haproxy.cfg" mode: "0644" register: haproxy_cfg - name: Ensure haproxy container is running - community.docker.docker_container: + containers.podman.podman_container: name: haproxy - image: haproxy:2.8 - networks: - - name: host + privileged: true # Rocky 9 SELinux prevents visibility of the host volumes + image: docker.io/haproxy:2.8 + network: host restart: "{{ haproxy_cfg is changed }}" volumes: - - "/etc/haproxy:/usr/local/etc/haproxy:ro" + - "{{ ansible_env.HOME }}/haproxy:/usr/local/etc/haproxy:ro" - name: Wait for reductionist server to be accessible via HAProxy ansible.builtin.uri: diff --git a/docs/deployment.md b/docs/deployment.md index bfda1da..5e2912c 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -5,7 +5,7 @@ The Ansible playbook allows for a secure, scale-out deployment of Reductionist, The following services are supported: -* Docker engine +* Podman engine * Step CA Certificate Authority (generates certificates for Reductionist) * Step CLI (requests and renews certificates) * Minio object store (optional, for testing) @@ -18,11 +18,11 @@ The following services are supported: The existence of correctly configured hosts is assumed by this playbook. -The following host OS distributions are supported: +The following host OS distributions have been tested and are supported: -* Ubuntu 20.04-22.04 -* CentOS Stream 8-9 -* Rocky Linux 8-9 +* CentOS Stream 9 +* Rocky Linux 9 +* Ubuntu 24.04 Currently only a single network is supported. Several TCP ports should be accessible on this network. @@ -82,7 +82,7 @@ reductionist1 reductionist # Do not edit. -[docker:children] +[podman:children] haproxy jaeger minio @@ -133,22 +133,73 @@ ansible-galaxy collection install -r deployment/requirements.yml ## Deployment -Run the playbook: +Podman will be used to run containers under the same user account used for ansible deployment. +To install requisite system packages some tasks will require sudo `privileged` access. + +To run the entire playbook as a non-privileged user prompting for a sudo password: ```sh -ansible-playbook -i deployment/inventory deployment/site.yml +ansible-playbook -i deployment/inventory deployment/site.yml -K ``` -If you want to run only specific plays in the playbook, the following tags are supported and may be specified via `--tags `: +To run specific plays the following tags are supported and may be specified via `--tags `: -* `docker` +* `podman` - runs privileged tasks to install the required system packages * `step-ca` -* `step` +* `step` - runs privileged tasks to install the required system packages and Step CA certificate * `minio` * `prometheus` * `jaeger` * `reductionist` * `haproxy` +### Minimal deployment of Podman and the Reductionist + +Podman is a prerequisite for running the Reductionist. +Podman can run containers as a **non-privileged** user, however this user must have **linger** enabled on their account to allow Podman to continue to run after logging out of the user session. + +To enable **linger** support for the non-privileged user: +```sh +sudo loginctl enable-linger +``` + +Alternatively, run the optional `podman` play to install Podman as a **non-privileged** user. The following will prompt for the sudo password to escalate privileges only for package installation and for enabling **linger** for the non-privileged user: +```sh +ansible-playbook -i deployment/inventory deployment/site.yml --tags podman -K +``` + +Then to run the `reductionist` play, again as the **non-privileged** user: +```sh +ansible-playbook -i deployment/inventory deployment/site.yml --tags reductionist +``` + +Podman containers require a manual restart after a system reboot. +This requires logging into the host(s) running the Reductionist as the **non-privileged** user to run: +```sh +podman restart reductionist +``` + +Automatic restart on boot can be enabled via **systemd**, not covered by this documentation. + +### Using SSL/TLS certificates with the Reductionist + +To enable **https** connections edit `deployment/group_vars/all` before deployment as set: + +``` +REDUCTIONIST_HTTPS: "true" +``` + +Note, this is the default. + +Create a `certs` directory under the home directory of the non-privileged deployment user, this will be done automatically and the following files will be added if Step is deployed. +If using third party certificates the following files must be added manually using the file names shown: + +| Filename | Description | +| -------- | ------- | +| certs/key.pem | Private key file | +| certs/cert.pem | Certificate file including any intermediates | + +Certificates can be added post Reductionist deployment but the Reductionist's container will need to be restarted afterwards. + ## Usage Once deployed, the Reductionist API is accessible on port 8080 by HAProxy. The Prometheus UI is accessible on port 9090 on the host running Prometheus. The Jaeger UI is accessible on port 16686 on the host running Jaeger.