diff --git a/ansible/roles/basic_users/tasks/main.yml b/ansible/roles/basic_users/tasks/main.yml index 23d160850..6abba9cc0 100644 --- a/ansible/roles/basic_users/tasks/main.yml +++ b/ansible/roles/basic_users/tasks/main.yml @@ -66,7 +66,7 @@ when: - item.state | default('present') == 'present' - item.create_home | default(true) | bool - - inventory_hostname == basic_users_homedir_server + - ansible_hostname == basic_users_homedir_server # The following tasks run on a single *client* node, so that home directory # paths are easily constructed, becoming each user so that root-squash @@ -85,7 +85,7 @@ when: - item.state | default('present') == 'present' - item.generate_ssh_key | default(true) | bool or item.public_key is defined - - inventory_hostname == basic_users_homedir_client + - ansible_hostname == basic_users_homedir_client - name: Generate cluster ssh key community.crypto.openssh_keypair: @@ -101,7 +101,7 @@ when: - item.state | default('present') == 'present' - item.generate_ssh_key | default(true) - - inventory_hostname == basic_users_homedir_client + - ansible_hostname == basic_users_homedir_client register: _cluster_ssh_keypair - name: Write generated cluster ssh key to authorized_keys @@ -118,7 +118,7 @@ when: - item.item.state | default('present') == 'present' - item.item.generate_ssh_key | default(true) - - inventory_hostname == basic_users_homedir_client + - ansible_hostname == basic_users_homedir_client - item.public_key is defined # NB this is the *returned* public key - name: Write supplied public key to authorized_keys @@ -134,5 +134,5 @@ label: "{{ item.name }}" when: - item.state | default('present') == 'present' - - inventory_hostname == basic_users_homedir_client + - ansible_hostname == basic_users_homedir_client - item.public_key is defined # NB this is the *provided* public key diff --git a/ansible/roles/cacerts/tasks/export.yml b/ansible/roles/cacerts/tasks/export.yml index 7345b8573..c9c64713b 100644 --- a/ansible/roles/cacerts/tasks/export.yml +++ b/ansible/roles/cacerts/tasks/export.yml @@ -2,7 +2,7 @@ copy: src: "{{ item }}" dest: /exports/cluster/cacerts/ - owner: root + owner: slurm group: root mode: 0644 with_fileglob: diff --git a/ansible/roles/compute_init/files/compute-init.yml b/ansible/roles/compute_init/files/compute-init.yml index bf486f5b2..e97b5918d 100644 --- a/ansible/roles/compute_init/files/compute-init.yml +++ b/ansible/roles/compute_init/files/compute-init.yml @@ -29,15 +29,9 @@ tuned_enabled: true tuned_started: true - nfs_client_mnt_point: "/mnt" - nfs_client_mnt_options: - nfs_client_mnt_state: mounted - nfs_configurations: nfs_enable: clients: false - # openhpc: no defaults required - os_manila_mount_shares: [] os_manila_mount_ceph_conf_path: /etc/ceph os_manila_mount_state: mounted @@ -47,15 +41,8 @@ - noatime - _netdev # prevents mount blocking early boot before networking available - rw - - basic_users_groups: [] - basic_users_manage_homedir: false # homedir must already exist on shared filesystem - basic_users_userdefaults: - state: present - create_home: "{{ basic_users_manage_homedir }}" - generate_ssh_key: "{{ basic_users_manage_homedir }}" - ssh_key_comment: "{{ item.name }}" - basic_users_users: [] + - nodev + - nosuid tasks: - block: @@ -96,6 +83,7 @@ when: _mount_mnt_cluster.failed - name: Check if hostvars exist + become_user: slurm stat: path: "/mnt/cluster/hostvars/{{ ansible_hostname }}/hostvars.yml" register: hostvars_stat @@ -109,17 +97,33 @@ - meta: end_play when: not hostvars_stat.stat.exists - - name: Load hostvars from NFS + - name: Sync /mnt/cluster to /var/tmp + become_user: slurm + synchronize: + src: "/mnt/cluster/" + dest: "/var/tmp/cluster/" + archive: yes + recursive: yes + + - name: Unmount /mnt/cluster after sync + mount: + path: /mnt/cluster + state: unmounted + + - name: Load hostvars # this is higher priority than vars block = normal ansible's hostvars include_vars: - file: "/mnt/cluster/hostvars/{{ ansible_hostname }}/hostvars.yml" # can't use inventory_hostname - - # TODO: should /mnt/cluster now be UNMOUNTED to avoid future hang-ups? + file: "/var/tmp/cluster/hostvars/{{ ansible_hostname }}/hostvars.yml" - name: Run chrony role ansible.builtin.include_role: name: mrlesmithjr.chrony - when: enable_chrony | bool + tasks_from: config_chrony.yml + vars: + # workaround for set_facts.yml: + chrony_config: /etc/chrony.conf + chrony_service: chronyd + when: enable_chrony - name: Configure resolve.conf block: @@ -149,7 +153,7 @@ - name: Copy cluster /etc/hosts copy: - src: /mnt/cluster/hosts + src: /var/tmp/cluster/hosts dest: /etc/hosts owner: root group: root @@ -160,14 +164,14 @@ ansible.builtin.include_role: name: cacerts vars: - cacerts_cert_dir: "/mnt/cluster/cacerts" + cacerts_cert_dir: "/var/tmp/cluster/cacerts" when: enable_cacerts - name: Configure sshd ansible.builtin.include_role: name: sshd vars: - sshd_conf_src: "/mnt/cluster/hostconfig/{{ ansible_hostname }}/sshd.conf" + sshd_conf_src: "/var/tmp/cluster/hostconfig/{{ ansible_hostname }}/sshd.conf" when: enable_sshd - name: Configure tuned @@ -179,22 +183,24 @@ name: sssd tasks_from: configure.yml vars: - sssd_conf_src: "/mnt/cluster/hostconfig/{{ ansible_hostname }}/sssd.conf" + sssd_conf_src: "/var/tmp/cluster/hostconfig/{{ ansible_hostname }}/sssd.conf" when: enable_sssd # NFS client mount - name: If nfs-clients is present - include_tasks: tasks/nfs-clients.yml + ansible.builtin.include_role: + name: stackhpc.nfs + tasks_from: nfs-clients.yml when: - enable_nfs - - nfs_enable.clients | bool or ('nfs_enable' in item and item.nfs_enable.clients | bool) + - nfs_enable.clients | default(item.nfs_enable.clients) | bool loop: "{{ nfs_configurations }}" - name: Manila mounts block: - name: Read manila share info from nfs file include_vars: - file: /mnt/cluster/manila_share_info.yml + file: /var/tmp/cluster/manila_share_info.yml no_log: true # contains secrets - name: Ensure Ceph configuration directory exists @@ -269,34 +275,15 @@ when: enable_lustre - name: Basic users - block: - - name: Create groups - ansible.builtin.group: "{{ item }}" - loop: "{{ basic_users_groups }}" - - - name: Create users - user: "{{ basic_users_userdefaults | combine(item) | filter_user_params() }}" - loop: "{{ basic_users_users }}" - loop_control: - label: "{{ item.name }} [{{ item.state | default('present') }}]" - register: basic_users_info - - - name: Write sudo rules - blockinfile: - path: /etc/sudoers.d/80-{{ item.name}}-user - block: "{{ item.sudo }}" - create: true - loop: "{{ basic_users_users }}" - loop_control: - label: "{{ item.name }}" - when: "'sudo' in item" + ansible.builtin.include_role: + name: basic_users when: enable_basic_users - name: EESSI block: - name: Copy cvmfs config copy: - src: /mnt/cluster/cvmfs/default.local + src: /var/tmp/cluster/cvmfs/default.local dest: /etc/cvmfs/default.local owner: root group: root diff --git a/ansible/roles/compute_init/tasks/export.yml b/ansible/roles/compute_init/tasks/export.yml index 68fcf4be3..3226e13b8 100644 --- a/ansible/roles/compute_init/tasks/export.yml +++ b/ansible/roles/compute_init/tasks/export.yml @@ -12,9 +12,9 @@ copy: src: /etc/hosts dest: /exports/cluster/hosts - owner: root + owner: slurm group: root - mode: u=rw,go= + mode: u=r,g=rw,o= remote_src: true run_once: true delegate_to: "{{ groups['control'] | first }}" @@ -41,9 +41,9 @@ copy: content: "{{ os_manila_mount_share_info_var | to_nice_yaml }}" dest: /exports/cluster/manila_share_info.yml - owner: root + owner: slurm group: root - mode: u=rw,g=r + mode: u=r,g=rw,o= run_once: true delegate_to: "{{ groups['control'] | first }}" when: os_manila_mount_share_info is defined @@ -55,7 +55,7 @@ file: path: /exports/cluster/cvmfs state: directory - owner: root + owner: slurm group: root mode: 0755 run_once: true @@ -65,7 +65,7 @@ copy: src: /etc/cvmfs/default.local dest: /exports/cluster/cvmfs/default.local - owner: root + owner: slurm group: root mode: 0644 remote_src: true @@ -82,9 +82,9 @@ file: path: "/exports/cluster/hostconfig/{{ inventory_hostname }}/" state: directory - owner: root + owner: slurm group: root - mode: u=rw,go= + mode: u=rX,g=rwX,o= delegate_to: "{{ groups['control'] | first }}" - name: Template sssd config diff --git a/ansible/roles/compute_init/tasks/install.yml b/ansible/roles/compute_init/tasks/install.yml index 8288b65fe..0638f7011 100644 --- a/ansible/roles/compute_init/tasks/install.yml +++ b/ansible/roles/compute_init/tasks/install.yml @@ -33,8 +33,8 @@ dest: templates/ceph.keyring.j2 - src: ../../resolv_conf/files/NetworkManager-dns-none.conf dest: files/NetworkManager-dns-none.conf - - src: ../../basic_users/filter_plugins/filter_keys.py - dest: filter_plugins/filter_keys.py + - src: ../../basic_users + dest: roles/ - src: ../../cacerts dest: roles/ - src: ../../sssd @@ -43,8 +43,8 @@ dest: roles/ - src: ../../tuned/tasks/configure.yml dest: tasks/tuned.yml - - src: ../../stackhpc.nfs/tasks/nfs-clients.yml - dest: tasks/nfs-clients.yml + - src: ../../stackhpc.nfs + dest: roles/ - src: ../../mrlesmithjr.chrony dest: roles/ - src: ../../lustre diff --git a/environments/.stackhpc/inventory/group_vars/all/nfs.yml b/environments/.stackhpc/inventory/group_vars/all/nfs.yml deleted file mode 100644 index af3861ee9..000000000 --- a/environments/.stackhpc/inventory/group_vars/all/nfs.yml +++ /dev/null @@ -1,17 +0,0 @@ -nfs_configurations: - - comment: Export /exports/home from Slurm control node as /home - nfs_enable: - server: "{{ inventory_hostname in groups['control'] }}" - # Don't mount share on server where it is exported from... - # Could do something like `nfs_clients: "{{ 'nfs_servers' not in group_names }}"` instead. - clients: "{{ inventory_hostname in groups['cluster'] and inventory_hostname not in groups['control'] }}" - nfs_server: "{{ nfs_server_default }}" - nfs_export: "/exports/home" # assumes skeleton TF is being used - nfs_client_mnt_point: "/home" - - # EXPERIMENTAL - not generally secure - - comment: Export /exports/cluster from Slurm control node - nfs_enable: - server: "{{ inventory_hostname in groups['control'] }}" - clients: false - nfs_export: "/exports/cluster" diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json index 6e87c5d58..a56dda976 100644 --- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-250312-1522-7e5c051d", - "RL9": "openhpc-RL9-250312-1435-7e5c051d" + "RL8": "openhpc-RL8-250319-1045-69713f23", + "RL9": "openhpc-RL9-250319-1045-69713f23" } } diff --git a/environments/common/inventory/group_vars/all/ansible_init.yml b/environments/common/inventory/group_vars/all/ansible_init.yml index af30f37d6..df4060f94 100644 --- a/environments/common/inventory/group_vars/all/ansible_init.yml +++ b/environments/common/inventory/group_vars/all/ansible_init.yml @@ -1,4 +1,4 @@ -ansible_init_wait: 1200 # seconds +ansible_init_wait: 300 # seconds ansible_init_pip_packages: # role defaults: diff --git a/environments/common/inventory/group_vars/all/basic_users.yml b/environments/common/inventory/group_vars/all/basic_users.yml index a7b9359b7..d94d12982 100644 --- a/environments/common/inventory/group_vars/all/basic_users.yml +++ b/environments/common/inventory/group_vars/all/basic_users.yml @@ -3,3 +3,7 @@ # See ansible/roles/basic_users/README.md for variable definitions. basic_users_users: [] + +# The following are defined for the purpose of compute-init +basic_users_homedir_server: "{{ groups['control'] | first }}" +basic_users_homedir_client: "{{ groups['login'] | first }}" \ No newline at end of file diff --git a/environments/common/inventory/group_vars/all/nfs.yml b/environments/common/inventory/group_vars/all/nfs.yml index 09a3203a0..39c264576 100644 --- a/environments/common/inventory/group_vars/all/nfs.yml +++ b/environments/common/inventory/group_vars/all/nfs.yml @@ -28,3 +28,13 @@ nfs_configurations: # NB: this is set as default for all shares above but is repeated here # in case nfs_export_clients is overriden nfs_export_clients: "{{ _nfs_node_ips }}" + + - comment: Export /exports/cluster from Slurm control node + nfs_enable: + server: "{{ inventory_hostname in groups['control'] }}" + clients: false + nfs_export: "/exports/cluster" + # prevent non-cluster IPs mounting the share: + # NB: this is set as default for all shares above but is repeated here + # in case nfs_export_clients is overriden + nfs_export_clients: "{{ _nfs_node_ips }}"