diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index a8d3dbe29..217b09c22 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -39,6 +39,7 @@ jobs: "openstack.openhpc": "rocky-latest-RL9" } } + ARK_PASSWORD: ${{ secrets.ARK_PASSWORD }} steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/nightlybuild.yml b/.github/workflows/nightlybuild.yml index da3de4ea5..9f45b0890 100644 --- a/.github/workflows/nightlybuild.yml +++ b/.github/workflows/nightlybuild.yml @@ -37,6 +37,7 @@ jobs: "RL8": "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2", "RL9": "Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2" } + ARK_PASSWORD: ${{ secrets.ARK_PASSWORD }} steps: - uses: actions/checkout@v2 diff --git a/ansible/.gitignore b/ansible/.gitignore index 8edcc4360..48c917c4f 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -64,3 +64,5 @@ roles/* !roles/k9s/** !roles/lustre/ !roles/lustre/** +!roles/dnf_repos/ +!roles/dnf_repos/** diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml index 7cad2dc59..b28e4f308 100644 --- a/ansible/fatimage.yml +++ b/ansible/fatimage.yml @@ -98,6 +98,10 @@ tasks_from: jupyter_compute.yml when: "'openondemand_jupyter' in group_names" + - name: Install Apache PAM module # Extracted from start of roles/openondemand/tasks/pam_auth.yml to ensure only installed during build + yum: + name: mod_authnz_pam + # - import_playbook: monitoring.yml: - import_role: name: opensearch diff --git a/ansible/roles/dnf_repos/defaults/main.yml b/ansible/roles/dnf_repos/defaults/main.yml new file mode 100644 index 000000000..a3e05d0e1 --- /dev/null +++ b/ansible/roles/dnf_repos/defaults/main.yml @@ -0,0 +1,25 @@ +dnf_repos_rocky_ark_prefix: https://ark.stackhpc.com/pulp/content/{{ ansible_distribution | lower }}/{{ ansible_distribution_version }} +dnf_repos_rocky_ark_suffix: "{{ ansible_architecture }}/os/{{ dnf_repos_rocky_ark_timestamp }}/" +# most stable from https://github.com/stackhpc/stackhpc-kayobe-config/blob/stackhpc/2024.1/etc/kayobe/pulp-repo-versions.yml +# note that some timestamps can't be used because not all repos have snapshots for them +dnf_repos_rocky_ark_timestamp: 20240816T002610 +dnf_repos_username: slurm-app-ci +dnf_repos_password: "{{ lookup('ansible.builtin.env', 'ARK_PASSWORD') }}" + +# epel installed separately +dnf_repos_repolist: +- file: rocky + name: baseos + base_url: "{{ dnf_repos_rocky_ark_prefix }}/BaseOS/{{ dnf_repos_rocky_ark_suffix }}" +- file: rocky + name: appstream + base_url: "{{ dnf_repos_rocky_ark_prefix }}/AppStream/{{ dnf_repos_rocky_ark_suffix }}" +- file: rocky + name: crb + base_url: "{{ dnf_repos_rocky_ark_prefix }}/CRB/{{ dnf_repos_rocky_ark_suffix }}" +- file: rocky-extras + name: extras + base_url: "{{ dnf_repos_rocky_ark_prefix }}/extras/{{ dnf_repos_rocky_ark_suffix }}" + +dnf_repos_epel_timestamp: 20240902T080424 +dnf_repos_epel_baseurl: "https://ark.stackhpc.com/pulp/content/epel/{{ ansible_distribution_major_version }}/Everything/{{ ansible_architecture }}/{{ dnf_repos_epel_timestamp }}" diff --git a/ansible/roles/dnf_repos/tasks/disable_repos.yml b/ansible/roles/dnf_repos/tasks/disable_repos.yml new file mode 100644 index 000000000..f8997b741 --- /dev/null +++ b/ansible/roles/dnf_repos/tasks/disable_repos.yml @@ -0,0 +1,18 @@ +--- +- name: Disable Pulp repos and remove creds + ansible.builtin.yum_repository: + file: "{{ item.file }}" + name: "{{ item.name }}" + baseurl: "{{ item.base_url }}" + description: "{{ item.name }}" + enabled: false + loop: "{{ dnf_repos_repolist }}" + +- name: Disable EPEL repo and remove creds + ansible.builtin.yum_repository: + name: epel + file: epel + description: epel + baseurl: "{{ dnf_repos_epel_baseurl }}" + gpgcheck: false + enabled: false diff --git a/ansible/roles/dnf_repos/tasks/set_repos.yml b/ansible/roles/dnf_repos/tasks/set_repos.yml new file mode 100644 index 000000000..f8cca5600 --- /dev/null +++ b/ansible/roles/dnf_repos/tasks/set_repos.yml @@ -0,0 +1,26 @@ +--- + +- name: Replace system repos with Pulp repos + ansible.builtin.yum_repository: + file: "{{ item.file }}" + name: "{{ item.name }}" + baseurl: "{{ item.base_url }}" + description: "{{ item.name }}" + username: "{{ dnf_repos_username }}" + password: "{{ dnf_repos_password }}" + loop: "{{ dnf_repos_repolist }}" + +- name: Install epel-release + # done so that roles installing epel via epel-release don't over-write our changes to the epel repo + ansible.builtin.dnf: + name: epel-release + +- name: Use Pulp EPEL repo + ansible.builtin.yum_repository: + name: epel + file: epel + description: epel + gpgcheck: false + username: "{{ dnf_repos_username }}" + password: "{{ dnf_repos_password }}" + baseurl: "{{ dnf_repos_epel_baseurl }}" diff --git a/ansible/roles/eessi/tasks/main.yaml b/ansible/roles/eessi/tasks/main.yaml index d121b6fdd..c61625b0e 100644 --- a/ansible/roles/eessi/tasks/main.yaml +++ b/ansible/roles/eessi/tasks/main.yaml @@ -10,6 +10,7 @@ - name: Add CVMFS repo dnf: name: https://ecsft.cern.ch/dist/cvmfs/cvmfs-release/cvmfs-release-latest.noarch.rpm + disable_gpg_check: true - name: Install CVMFS dnf: diff --git a/ansible/roles/openondemand/tasks/main.yml b/ansible/roles/openondemand/tasks/main.yml index 86184f13c..bd5706ecb 100644 --- a/ansible/roles/openondemand/tasks/main.yml +++ b/ansible/roles/openondemand/tasks/main.yml @@ -6,12 +6,19 @@ loop: "{{ openondemand_osc_ood_defaults | dict2items }}" when: (item.key in hostvars[inventory_hostname]) or (item.value) +# osc.ood variables are exposed to play here instead of setting 'public' in include role so that they will still be exposed during runtime +- ansible.builtin.include_vars: + dir: "{{ playbook_dir }}/roles/osc.ood/defaults/main" + +- ansible.builtin.include_vars: + file: "{{ playbook_dir }}/roles/osc.ood/vars/Rocky/{{ ansible_distribution_major_version }}.yml" + # if using PAM auth we need apache installed but NOT started so split the osc.ood role up: - include_role: name: osc.ood tasks_from: install-package.yml vars_from: "Rocky/{{ ansible_distribution_major_version }}.yml" - public: yes # Expose the vars from this role to the rest of the play + when: appliances_mode != 'configure' # can't set vars: from a dict hence the workaround above - include_tasks: diff --git a/ansible/roles/openondemand/tasks/pam_auth.yml b/ansible/roles/openondemand/tasks/pam_auth.yml index 0edce622f..6bc4bda36 100644 --- a/ansible/roles/openondemand/tasks/pam_auth.yml +++ b/ansible/roles/openondemand/tasks/pam_auth.yml @@ -1,6 +1,6 @@ # https://osc.github.io/ood-documentation/latest/authentication/pam.html --- -- name: Install Apache PAM module +- name: Install Apache PAM module # Extracted from start of roles/openondemand/tasks/pam_auth.yml to ensure only installed during build yum: name: mod_authnz_pam diff --git a/ansible/roles/openondemand/tasks/vnc_compute.yml b/ansible/roles/openondemand/tasks/vnc_compute.yml index 388e3b3c5..6ec340249 100644 --- a/ansible/roles/openondemand/tasks/vnc_compute.yml +++ b/ansible/roles/openondemand/tasks/vnc_compute.yml @@ -48,6 +48,7 @@ tags: install yum: name: '@Xfce' + when: appliances_mode != 'configure' # dnf group/module installs aren't idempotent so only run during build # - name: Ensure python3.9 installed # dnf: diff --git a/ansible/slurm.yml b/ansible/slurm.yml index 0b7397242..cf282f786 100644 --- a/ansible/slurm.yml +++ b/ansible/slurm.yml @@ -25,8 +25,9 @@ tags: - openhpc tasks: - - import_role: + - include_role: name: stackhpc.openhpc + tasks_from: "{{ 'runtime.yml' if appliances_mode == 'configure' else 'main.yml' }}" - name: Set locked memory limits on user-facing nodes hosts: diff --git a/environments/.stackhpc/hooks/post.yml b/environments/.stackhpc/hooks/post.yml index bd60015d9..98e366304 100644 --- a/environments/.stackhpc/hooks/post.yml +++ b/environments/.stackhpc/hooks/post.yml @@ -11,4 +11,13 @@ with_items: - "/opt/ood/ondemand/root/usr/share/gems/3.1/ondemand/{{ ondemand_package_version }}-1/gems/bootstrap_form-2.7.0/test/dummy/Gemfile.lock" - "/opt/ood/ondemand/root/usr/share/gems/3.1/ondemand/{{ ondemand_package_version }}-1/gems/bootstrap_form-4.5.0/demo/yarn.lock" - - /var/www/ood/apps/sys/dashboard/node_modules/data-confirm-modal/Gemfile.lock \ No newline at end of file + - /var/www/ood/apps/sys/dashboard/node_modules/data-confirm-modal/Gemfile.lock + +- hosts: builder + become: yes + tasks: + - name: Disable ark repos + ansible.builtin.include_role: + name: dnf_repos + tasks_from: disable_repos.yml + when: ansible_distribution_major_version == "9" #TODO update role once RL8 config decided diff --git a/environments/.stackhpc/hooks/pre.yml b/environments/.stackhpc/hooks/pre.yml index 0fdbf9f60..9ea84740d 100644 --- a/environments/.stackhpc/hooks/pre.yml +++ b/environments/.stackhpc/hooks/pre.yml @@ -17,3 +17,12 @@ - "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/inventory/hosts.yml" - "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/inventory/group_vars/all/secrets.yml" - "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/inventory/group_vars/all/test_user.yml" + +- hosts: builder + become: yes + tasks: + - name: Replace system repos with ark + ansible.builtin.include_role: + name: dnf_repos + tasks_from: set_repos.yml + when: ansible_distribution_major_version == "9" #TODO update role once RL8 config decided diff --git a/environments/.stackhpc/inventory/group_vars/openhpc/overrides.yml b/environments/.stackhpc/inventory/group_vars/openhpc/overrides.yml index 5aac5f8ad..858dfd9d3 100644 --- a/environments/.stackhpc/inventory/group_vars/openhpc/overrides.yml +++ b/environments/.stackhpc/inventory/group_vars/openhpc/overrides.yml @@ -1,3 +1,13 @@ openhpc_config_extra: SlurmctldDebug: debug SlurmdDebug: debug + +ohpc_default_extra_repos: + "9": [] #overriding to ensure doesn't overwrite ark epel repo + "8": + - name: epel + file: epel + description: "Extra Packages for Enterprise Linux 8 - $basearch" + metalink: "https://mirrors.fedoraproject.org/metalink?repo=epel-8&arch=$basearch&infra=$infra&content=$contentdir" + gpgcheck: true + gpgkey: "https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-8" diff --git a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json index f9a2087c8..14c997596 100644 --- a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-241118-0918-4538c6df", - "RL9": "openhpc-RL9-241118-0918-4538c6df" + "RL8": "openhpc-RL8-241203-1659-b0558b95", + "RL9": "openhpc-RL9-241203-1659-b0558b95" } } diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index 15340820f..2a88f035d 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -6,6 +6,7 @@ appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }} appliances_environment_name: "{{ appliances_environment_root | basename | regex_replace('\\W+', '') }}" # [a-zA-Z0-9_] only appliances_cockpit_state: absent # RHEL cockpit installed but not enabled in genericcloud images; appliance defaults to removing it #appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform +appliances_mode: configure # Address(ip/dns) for internal communication between services. This is # normally traffic you do no want to expose to users. diff --git a/environments/common/inventory/group_vars/builder/defaults.yml b/environments/common/inventory/group_vars/builder/defaults.yml index 22042c1bf..b43d9f03c 100644 --- a/environments/common/inventory/group_vars/builder/defaults.yml +++ b/environments/common/inventory/group_vars/builder/defaults.yml @@ -22,3 +22,4 @@ squid_cache_disk: 0 # just needs to be defined squid_cache_mem: 0 tuned_started: false tuned_enabled: false +appliances_mode: build