File tree Expand file tree Collapse file tree 13 files changed +171
-8
lines changed
Expand file tree Collapse file tree 13 files changed +171
-8
lines changed Original file line number Diff line number Diff line change 9999 . environments/.stackhpc/activate
100100 ansible-playbook -vv ansible/adhoc/hpctests.yml
101101
102+ - name : Run EESSI tests
103+ run : |
104+ . venv/bin/activate
105+ . environments/.stackhpc/activate
106+ ansible-playbook -vv ansible/ci/check_eessi.yml
107+
102108 - name : Confirm Open Ondemand is up (via SOCKS proxy)
103109 run : |
104110 . venv/bin/activate
Original file line number Diff line number Diff line change 112112 tasks_from : config.yml
113113 tags : config
114114
115+ - name : Setup EESSI
116+ hosts : eessi
117+ tags : eessi
118+ become : true
119+ gather_facts : false
120+ tasks :
121+ - name : Install and configure EESSI
122+ import_role :
123+ name : eessi
124+
115125- hosts : update
116126 gather_facts : false
117127 become : yes
Original file line number Diff line number Diff line change 1+ ---
2+ - name : Run EESSI test job
3+ hosts : login[0]
4+ vars :
5+ eessi_test_rootdir : /home/eessi_test
6+ tasks :
7+ - name : Create test root directory
8+ file :
9+ path : " {{ eessi_test_rootdir }}"
10+ state : directory
11+ owner : " {{ ansible_user }}"
12+ group : " {{ ansible_user }}"
13+ become : true
14+
15+ - name : Clone eessi-demo repo
16+ ansible.builtin.git :
17+ repo : " https://github.com/eessi/eessi-demo.git"
18+ dest : " {{ eessi_test_rootdir }}/eessi-demo"
19+
20+ - name : Run test job
21+ ansible.builtin.shell :
22+ cmd : |
23+ source /cvmfs/pilot.eessi-hpc.org/latest/init/bash
24+ srun ./run.sh
25+ chdir : " {{ eessi_test_rootdir }}/eessi-demo/TensorFlow"
26+ executable : /bin/bash
27+ register : job_output
28+
29+ - name : Fail if job output contains error
30+ fail :
31+ # Note: Job prints live progress bar to terminal, so use regex filter to remove this from stdout
32+ msg : " Test job using EESSI modules failed. Job output was: {{ job_output.stdout | regex_replace('\b ', '') }}"
33+ when : ' "Epoch 5/5" not in job_output.stdout'
34+
Original file line number Diff line number Diff line change 22 gather_facts : false
33 become : true
44 vars :
5- sacct_stdout_expected : |- # based on CI running hpctests as the first job - NB note no trailing newline
5+ sacct_stdout_expected : |- # based on CI running hpctests as the first job
66 JobID,JobName,State
77 1,pingpong.sh,COMPLETED
88 2,pingmatrix.sh,COMPLETED
1818 register : sacct
1919 - name : Check info for ended jobs
2020 assert :
21- that : sacct.stdout == sacct_stdout_expected
21+ that : sacct_stdout_expected in sacct.stdout
2222 fail_msg : |
2323 Expected:
2424 --{{ sacct_stdout_expected }}--
2525 Got:
2626 --{{ sacct.stdout }}--
27- success_msg : sacct shows hpctests jobs as first and only jobs
27+ success_msg : sacct shows hpctests jobs as first jobs in list
Original file line number Diff line number Diff line change 1+ EESSI
2+ =====
3+
4+ Configure the EESSI pilot respository for use on given hosts.
5+
6+ Requirements
7+ ------------
8+
9+ None.
10+
11+ Role Variables
12+ --------------
13+
14+ - ` cvmfs_quota_limit_mb ` : Optional int. Maximum size of local package cache on each node in MB.
15+ - ` cvmfs_config_overrides ` : Optional dict. Set of key-value pairs for additional CernVM-FS settings see [ official docs] ( https://cvmfs.readthedocs.io/en/stable/cpt-configure.html ) for list of options. Each dict key should correspond to a valid config variable (e.g. ` CVMFS_HTTP_PROXY ` ) and the corresponding dict value will be set as the variable value (e.g. ` https://my-proxy.com ` ). These configuration parameters will be written to the ` /etc/cvmfs/default.local ` config file on each host in the form ` KEY=VALUE ` .
16+
17+ Dependencies
18+ ------------
19+
20+ None.
21+
22+ Example Playbook
23+ ----------------
24+
25+ ``` yaml
26+ - name : Setup EESSI
27+ hosts : eessi
28+ tags : eessi
29+ become : true
30+ tasks :
31+ - name : Install and configure EESSI
32+ import_role :
33+ name : eessi
34+ ` ` `
Original file line number Diff line number Diff line change 1+ ---
2+ # Default to 10GB
3+ cvmfs_quota_limit_mb : 10000
4+
5+ cvmfs_config_default :
6+ CVMFS_CLIENT_PROFILE : single
7+ CVMFS_QUOTA_LIMIT : " {{ cvmfs_quota_limit_mb }}"
8+
9+ cvmfs_config_overrides : {}
10+
11+ cvmfs_config : " {{ cvmfs_config_default | combine(cvmfs_config_overrides) }}"
Original file line number Diff line number Diff line change 1+ ---
2+ - name : Download Cern GPG key
3+ ansible.builtin.get_url :
4+ url : http://cvmrepo.web.cern.ch/cvmrepo/yum/RPM-GPG-KEY-CernVM
5+ dest : ./cvmfs-key.gpg
6+
7+ - name : Import downloaded GPG key
8+ command : rpm --import cvmfs-key.gpg
9+
10+ - name : Add CVMFS repo
11+ dnf :
12+ name : https://ecsft.cern.ch/dist/cvmfs/cvmfs-release/cvmfs-release-latest.noarch.rpm
13+
14+ - name : Install CVMFS
15+ dnf :
16+ name : cvmfs
17+
18+ - name : Install EESSI CVMFS config
19+ dnf :
20+ name : https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi-latest.noarch.rpm
21+ # NOTE: Can't find any docs on obtaining gpg key - maybe downloading directly from github is ok?
22+ disable_gpg_check : true
23+
24+ # Alternative version using official repo - still no GPG key :(
25+ # - name: Add EESSI repo
26+ # dnf:
27+ # name: http://repo.eessi-infra.org/eessi/rhel/8/noarch/eessi-release-0-1.noarch.rpm
28+
29+ # - name: Install EESSI CVMFS config
30+ # dnf:
31+ # name: cvmfs-config-eessi
32+
33+ - name : Add base CVMFS config
34+ community.general.ini_file :
35+ dest : /etc/cvmfs/default.local
36+ section : null
37+ option : " {{ item.key }}"
38+ value : " {{ item.value }}"
39+ no_extra_spaces : true
40+ loop : " {{ cvmfs_config | dict2items }}"
41+
42+
43+ # NOTE: Not clear how to make this idempotent
44+ - name : Ensure CVMFS config is setup
45+ command :
46+ cmd : " cvmfs_config setup"
Original file line number Diff line number Diff line change 11flavor = " vm.ska.cpu.general.small"
22networks = [" a262aabd-e6bf-4440-a155-13dbc1b5db0e" ] # WCDC-iLab-60
3- source_image_name = " openhpc-230412-1447-e3769af6 .qcow2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/258
4- # source_image_name = "Rocky-8-GenericCloud-Base-8.7-20221130 .0.x86_64.qcow2"
3+ source_image_name = " openhpc-230503-0944-bf8c3f63 .qcow2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/252
4+ fatimage_source_image_name = " Rocky-8-GenericCloud-8.6.20220702 .0.x86_64.qcow2"
55ssh_keypair_name = " slurm-app-ci"
66security_groups = [" default" , " SSH" ]
77ssh_bastion_host = " 128.232.222.183"
Original file line number Diff line number Diff line change @@ -17,7 +17,7 @@ variable "create_nodes" {
1717variable "cluster_image" {
1818 description = " single image for all cluster nodes - a convenience for CI"
1919 type = string
20- default = " openhpc-230412-1447-e3769af6 .qcow2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/258
20+ default = " openhpc-230503-0944-bf8c3f63 .qcow2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/252
2121 # default = "Rocky-8-GenericCloud-Base-8.7-20221130.0.x86_64.qcow2"
2222 # default = "Rocky-8-GenericCloud-8.6.20220702.0.x86_64.qcow2"
2323}
Original file line number Diff line number Diff line change @@ -22,13 +22,14 @@ grafana_address: "{{ hostvars[groups['grafana'].0].api_address }}"
2222
2323# Note RockyLinux 8.5 defines system user/groups in range 201-999
2424appliances_local_users_ansible_user_name : " {{ ansible_ssh_user | default(ansible_user) }}"
25+ appliances_local_users_podman_uid : 1001 # UID for podman user - normally next UID after default user
2526appliances_local_users_podman : # also used in environments/common/inventory/group_vars/all/podman.yml:podman_users
2627 name : podman
2728 comment : Used for running all containers
2829 # Would like to set subuid so that we that we know what will appear in /etc/subuid
2930 # See: https://github.com/ansible/ansible/issues/68199
3031 home : /var/lib/podman
31- uid : 1001
32+ uid : " {{ appliances_local_users_podman_uid }} "
3233
3334appliances_local_users_default :
3435 - user :
You can’t perform that action at this time.
0 commit comments