Skip to content
Binary file added .DS_Store
Binary file not shown.
83 changes: 79 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Install and configure Slurm
Role Variables
--------------

All variables are optional. If nothing is set, the role will install the Slurm client programs, munge, and create a `slurm.conf` with a single `localhost` node and `debug` partition. See the [defaults](defaults/main.yml) and [example playbook](#example-playbook) for examples.
All variables are optional. If nothing is set, the role will install the Slurm client programs, munge, and create a `slurm.conf` with a single `localhost` node and `debug` partition. See the [defaults](defaults/main.yml) and [example playbooks](#example-playbooks) for examples.

For the various roles a slurm node can play, you can either set group names, or add values to a list, `slurm_roles`.

Expand All @@ -22,15 +22,17 @@ of that partition or node.

Set `slurm_upgrade` true to upgrade.

You can use `slurm_user` (a hash) and `slurm_create_user` (a bool) to pre-create a Slurm user (so that uids match). See
You can use `slurm_user` (a hash) and `slurm_create_user` (a bool) to pre-create a Slurm user (so that uids match). See

Dependencies
------------

None.

Example Playbook
----------------
Example Playbooks
-----------------

Minimal setup, all services on one node:

```yaml
- name: Slurm all in One
Expand All @@ -41,6 +43,79 @@ Example Playbook
- galaxyproject.slurm
```

More extensive example:

```yaml
- name: Slurm execution hosts
hosts: all
roles:
- galaxyproject.slurm
vars:
slurm_cgroup_config:
CgroupMountpoint: "/sys/fs/cgroup"
CgroupAutomount: yes
CgroupReleaseAgentDir: "/etc/slurm/cgroup"
ConstrainCores: yes
TaskAffinity: no
ConstrainRAMSpace: yes
ConstrainSwapSpace: no
ConstrainDevices: no
AllowedRamSpace: 100
AllowedSwapSpace: 0
MaxRAMPercent: 100
MaxSwapPercent: 100
MinRAMSpace: 30
slurm_config:
AccountingStorageType: "accounting_storage/none"
ClusterName: cluster
FastSchedule: 1
GresTypes: gpu
JobAcctGatherType: "jobacct_gather/none"
MpiDefault: none
ProctrackType: "proctrack/cgroup"
ReturnToService: 1
SchedulerType: "sched/backfill"
SelectType: "select/cons_res"
SelectTypeParameters: "CR_Core"
SlurmctldHost: "slurmctl"
SlurmctldLogFile: "/var/log/slurm/slurmctld.log"
SlurmctldPidFile: "/var/run/slurmctld.pid"
SlurmdLogFile: "/var/log/slurm/slurmd.log"
SlurmdPidFile: "/var/run/slurmd.pid"
SlurmdSpoolDir: "/var/spool/slurmd"
StateSaveLocation: "/var/spool/slurmctld"
SwitchType: "switch/none"
TaskPlugin: "task/affinity,task/cgroup"
TaskPluginParam: Sched
slurm_create_user: yes
slurm_gres_config:
- File: /dev/nvidia[0-3]
Name: gpu
NodeName: gpu[01-10]
Type: tesla
slurm_munge_key: "../../../munge.key"
slurm_nodes:
- name: "gpu[01-10]"
CoresPerSocket: 18
Gres: "gpu:tesla:4"
Sockets: 2
ThreadsPerCore: 2
slurm_partitions:
- name: gpu
Default: YES
MaxTime: UNLIMITED
Nodes: "gpu[01-10]"
slurm_roles: ['exec']
slurm_user:
comment: "Slurm Workload Manager"
gid: 888
group: slurm
home: "/var/lib/slurm"
name: slurm
shell: "/usr/sbin/nologin"
uid: 888
```

License
-------

Expand Down
11 changes: 9 additions & 2 deletions defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ __slurm_config_default:
# default is proctrack/cgroup which is the best but also less than 100% chance of working e.g. in docker
ProctrackType: proctrack/pgid
# slurmctld options
AccountingStoragePort: 6819
SlurmctldPort: 6817
SlurmctldLogFile: "{{ '/var/log/slurm-llnl/slurmctld.log' if __slurm_debian else omit }}"
SlurmctldPidFile: >-
Expand Down Expand Up @@ -75,11 +76,17 @@ __slurm_packages: "{{ __slurm_debian_packages if __slurm_debian else __slurm_red

__slurmdbd_config_default:
AuthType: auth/munge
DbdHost: testing-slurm-slave-3
DbdPort: 6819
SlurmUser: "{{ __slurm_user_name }}"
PidFile: >-
{{
'/var/run/slurm-llnl/slurmdbd.pid' if __slurm_debian else omit
'/var/run/slurm/slurmdbd.pid'
}}
LogFile: "{{ '/var/log/slurm-llnl/slurmdbd.log' if __slurm_debian else omit }}"
LogFile: "{{ '/var/log/slurm/slurmdbd.log' }}"
StorageLoc: mariadb
StorageUser: slurm
StoragePass: testslurmdb
StoragePort: 3306
StorageType: accounting_storage/mysql
__slurmdbd_config_merged: "{{ __slurmdbd_config_default | combine(slurmdbd_config | default({})) }}"
8 changes: 8 additions & 0 deletions handlers/main.yml
Original file line number Diff line number Diff line change
@@ -1,24 +1,32 @@
---
- name: restart munge
service:
name: munge
state: restarted

- name: reload slurmd
become: yes
service:
name: "{{ slurmd_service_name }}"
state: reloaded
when: "'slurmexechosts' in group_names or 'exec' in slurm_roles"

- name: restart slurmd
become: yes
service:
name: "{{ slurmd_service_name }}"
state: restarted
when: "'slurmexechosts' in group_names or 'exec' in slurm_roles"

- name: reload slurmctld
become: yes
service:
name: "{{ slurmctld_service_name }}"
state: reloaded
when: "'slurmservers' in group_names or 'controller' in slurm_roles"

- name: restart slurmctld
become: yes
service:
name: "{{ slurmctld_service_name }}"
state: restarted
Expand Down
Binary file added tasks/.DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions tasks/_inc_create_config_dir.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
file:
path: "{{ slurm_config_dir }}"
state: directory

18 changes: 18 additions & 0 deletions tasks/common.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---

- name: Install Slurm client
become: true
package:
name: "{{ __slurm_packages.client }}"
state: "{{ 'latest' if slurm_upgrade else 'present' }}"
Expand All @@ -16,6 +17,7 @@
when: slurm_rotate_logs

- name: Install slurm.conf
become: true
template:
src: "slurm.conf.j2"
dest: "{{ slurm_config_dir }}/slurm.conf"
Expand All @@ -26,6 +28,22 @@
- restart slurmd
- restart slurmctld

- name: Change ownership, group and permisssions for /var/spool
become: true
file:
path: /var/spool
owner: slurm
group: slurm
mode: 0771

- name: Change ownership, group and permisssions for /var/run/slurm
become: true
file:
path: /var/run/slurm
owner: slurm
group: slurm
mode: 0771

- name: Include munge tasks
include_tasks: munge.yml
when: slurm_configure_munge
43 changes: 43 additions & 0 deletions tasks/db.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
- name: Install MariaDB, used for SLURM accounting
become: true
package:
name:
- mariadb-server
- python-mysqldb
state: latest

- name: Ensure InnoDB parameters are large enough for SLURM DBD
become: true
blockinfile:
path: '/etc/mysql/my.cnf'
state: present
backup: yes
insertafter: EOF
content: |
# See https://wiki.fysik.dtu.dk/niflheim/Slurm_database#id5
[mysqld]
innodb_buffer_pool_size=1G
innodb_log_file_size=64M
innodb_lock_wait_timeout=900

- name: Ensure MariaDB daemon is up
become: true
service:
name: 'mariadb'
enabled: yes
state: started

- name: Create DB for SLURMDBD
become: true
mysql_db:
login_unix_socket: /var/run/mysqld/mysqld.sock
name: mariadb
state: present


- name: Create DB user for SLURMDBD
become: true
mysql_user:
name: slurm
password: testslurmdb
state: present
3 changes: 3 additions & 0 deletions tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,23 @@
import_tasks: common.yml

- name: Ensure slurmdbd is enabled and running
become: yes
service:
name: "{{ slurmdbd_service_name }}"
enabled: yes
state: started
when: "'slurmdbdservers' in group_names or 'dbd' in slurm_roles"

- name: Ensure slurmctld is enabled and running
become: yes
service:
name: "{{ slurmctld_service_name }}"
enabled: yes
state: started
when: "'slurmservers' in group_names or 'controller' in slurm_roles"

- name: Ensure slurmd is enabled and running
become: yes
service:
name: "{{ slurmd_service_name }}"
enabled: yes
Expand Down
4 changes: 4 additions & 0 deletions tasks/munge.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---

- name: Check munge dir
become: true
file:
path: /etc/munge
owner: munge
Expand All @@ -16,8 +17,11 @@
group: munge
mode: 0400
when: slurm_munge_key is defined
notify:
- restart munge

- name: Ensure Munge is enabled and running
become: yes
service:
name: munge
enabled: yes
Expand Down
40 changes: 38 additions & 2 deletions tasks/slurmctld.yml
Original file line number Diff line number Diff line change
@@ -1,26 +1,62 @@
---

- name: Install Slurm controller packages
become: yes
package:
name: "{{ __slurm_packages.slurmctld }}"
state: "{{ 'latest' if slurm_upgrade else 'present' }}"

- name: Create slurm state directory
become: yes
file:
path: "{{ __slurm_config_merged.StateSaveLocation }}"
owner: "{{ __slurm_user_name }}"
group: "{{ __slurm_group_name }}"
mode: 0700
mode: 0755
state: directory
when: slurm_create_dirs
notify:
- reload slurmctld

- name: Create slurm log directory
become: true
file:
path: "{{ __slurm_config_merged.SlurmctldLogFile | dirname }}"
owner: "{{ __slurm_user_name }}"
group: "{{ __slurm_group_name }}"
mode: 0755
state: directory
when: slurm_create_dirs and __slurm_config_merged.SlurmctldLogFile
when: slurm_create_dirs and __slurm_config_merged.SlurmctldLogFile != omit

- name: Create slurm pid directory
become: true
file:
path: "{{ __slurm_config_merged.SlurmctldPidFile | dirname }}"
owner: "{{ __slurm_user_name }}"
group: "{{ __slurm_group_name }}"
mode: 0755
state: directory
when: slurm_create_dirs and __slurm_config_merged.SlurmctldPidFile != omit

- name: Include config dir creation tasks
include_tasks: _inc_create_config_dir.yml
when: slurm_create_dirs

- name: Install extra execution host configs
become: yes
template:
src: "{{ item.template }}"
dest: "{{ slurm_config_dir }}/{{ item.name }}"
backup: yes
with_items:
- name: cgroup.conf
config: slurm_cgroup_config
template: "generic.conf.j2"
- name: gres.conf
config: slurm_gres_config
template: "gres.conf.j2"
loop_control:
label: "{{ item.name }}"
when: item.config in vars
notify:
- reload slurmctld
Loading