264
264
value : " {{ item.value }}"
265
265
no_extra_spaces : true
266
266
loop : " {{ cvmfs_config | dict2items }}"
267
-
268
267
269
268
# NOTE: Not clear how to make this idempotent
270
269
- name : Ensure CVMFS config is setup
271
270
command :
272
- cmd : " cvmfs_config setup"
271
+ cmd : " cvmfs_config setup"
272
+
273
+
274
+ - name : Configure openhpc
275
+ block :
276
+ - name : Check openhpc_slurm_control_host, openhpc_cluster_name or openhpc_slurm_partitions exist
277
+ assert :
278
+ that :
279
+ - openhpc_slurm_control_host is defined
280
+ - openhpc_cluster_name is defined
281
+ - openhpc_cluster_name != ''
282
+ - openhpc_slurm_partitions is defined
283
+ fail_msg : " Undefined openhpc_slurm_control_host, openhpc_cluster_name or openhpc_slurm_partitions."
284
+
285
+ - name : Fail if control host not in play and munge key not specified
286
+ fail :
287
+ msg : " Either the slurm control node must be in the play or `openhpc_munge_key` must be set"
288
+ when :
289
+ - openhpc_slurm_control_host not in ansible_play_hosts
290
+ - not openhpc_munge_key
291
+
292
+ # - name: Ensure Slurm directories exists
293
+ # file:
294
+ # path: "{{ openhpc_state_save_location }}"
295
+ # owner: slurm
296
+ # group: slurm
297
+ # mode: 0755
298
+ # state: directory
299
+ # when: inventory_hostname == openhpc_slurm_control_host
300
+
301
+ # - name: Generate a Munge key on control host
302
+ # # NB this is usually a no-op as the package install actually generates a (node-unique) one, so won't usually trigger handler
303
+ # command: "dd if=/dev/urandom of=/etc/munge/munge.key bs=1 count=1024"
304
+ # args:
305
+ # creates: "/etc/munge/munge.key"
306
+ # when: inventory_hostname == openhpc_slurm_control_host
307
+
308
+ # - name: Retrieve Munge key from control host
309
+ # slurp:
310
+ # src: "/etc/munge/munge.key"
311
+ # register: openhpc_control_munge_key
312
+ # delegate_to: "{{ openhpc_slurm_control_host }}"
313
+ # when: openhpc_slurm_control_host in ansible_play_hosts
314
+
315
+ - name : Fix permissions on /etc to pass Munge startup checks
316
+ # Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2 makes /etc g=rwx rather than g=rx (where group=root)
317
+ # which fails munged startup checks
318
+ file :
319
+ path : /etc
320
+ state : directory
321
+ mode : g-w
322
+
323
+ - name : Write Munge key
324
+ copy :
325
+ content : " {{ openhpc_munge_key or (openhpc_control_munge_key.content | b64decode) }}"
326
+ dest : " /etc/munge/munge.key"
327
+ owner : munge
328
+ group : munge
329
+ mode : 0400
330
+ notify :
331
+ - Restart Munge service
332
+
333
+ - name : Ensure JobComp logfile exists
334
+ file :
335
+ path : " {{ openhpc_slurm_job_comp_loc }}"
336
+ state : touch
337
+ owner : slurm
338
+ group : slurm
339
+ mode : 0644
340
+ access_time : preserve
341
+ modification_time : preserve
342
+ when : openhpc_slurm_job_comp_type == 'jobcomp/filetxt'
343
+
344
+ - name : Template slurmdbd.conf
345
+ template :
346
+ src : slurmdbd.conf.j2
347
+ dest : /etc/slurm/slurmdbd.conf
348
+ mode : " 0600"
349
+ owner : slurm
350
+ group : slurm
351
+ notify : Restart slurmdbd service
352
+ when : openhpc_enable.database | default(false) | bool
353
+
354
+ - name : Make local tempfile for slurm.conf templating # ensures simultaneous runs don't clobber each other
355
+ ansible.builtin.tempfile :
356
+ register : _slurm_conf_tmpfile
357
+ delegate_to : localhost
358
+ when : openhpc_enable.control | default(false) or not openhpc_slurm_configless
359
+ changed_when : false # so molecule doesn't fail
360
+ become : no
361
+
362
+ - name : Template basic slurm.conf
363
+ template :
364
+ src : slurm.conf.j2
365
+ dest : " {{ _slurm_conf_tmpfile.path }}"
366
+ lstrip_blocks : true
367
+ mode : 0644
368
+ delegate_to : localhost
369
+ when : openhpc_enable.control | default(false) or not openhpc_slurm_configless
370
+ changed_when : false # so molecule doesn't fail
371
+ become : no
372
+
373
+ - name : Customise slurm.conf
374
+ community.general.ini_file :
375
+ path : " {{ _slurm_conf_tmpfile.path }}"
376
+ option : " {{ item.key }}"
377
+ section : ' '
378
+ value : " {{ (item.value | join(',')) if (item.value is sequence and item.value is not string) else item.value }}"
379
+ no_extra_spaces : true
380
+ create : no
381
+ mode : 0644
382
+ loop : " {{ openhpc_config | dict2items }}"
383
+ delegate_to : localhost
384
+ when : openhpc_enable.control | default(false) or not openhpc_slurm_configless
385
+ changed_when : false # so molecule doesn't fail
386
+ become : no
387
+
388
+ - name : Create slurm.conf
389
+ copy :
390
+ src : " {{ _slurm_conf_tmpfile.path }}"
391
+ dest : /etc/slurm/slurm.conf
392
+ owner : root
393
+ group : root
394
+ mode : 0644
395
+ when : openhpc_enable.control | default(false) or not openhpc_slurm_configless
396
+ notify :
397
+ - Restart slurmctld service
398
+ register : ohpc_slurm_conf
399
+ # NB uses restart rather than reload as number of nodes might have changed
400
+
401
+ - name : Create gres.conf
402
+ template :
403
+ src : " {{ openhpc_gres_template }}"
404
+ dest : /etc/slurm/gres.conf
405
+ mode : " 0600"
406
+ owner : slurm
407
+ group : slurm
408
+ when : openhpc_enable.control | default(false) or not openhpc_slurm_configless
409
+ notify :
410
+ - Restart slurmctld service
411
+ register : ohpc_gres_conf
412
+ # NB uses restart rather than reload as this is needed in some cases
413
+
414
+ - name : Template cgroup.conf
415
+ # appears to be required even with NO cgroup plugins: https://slurm.schedmd.com/cgroups.html#cgroup_design
416
+ template :
417
+ src : cgroup.conf.j2
418
+ dest : /etc/slurm/cgroup.conf
419
+ mode : " 0644" # perms/ownership based off src from ohpc package
420
+ owner : root
421
+ group : root
422
+ when : openhpc_enable.control | default(false) or not openhpc_slurm_configless
423
+
424
+ - name : Remove local tempfile for slurm.conf templating
425
+ ansible.builtin.file :
426
+ path : " {{ _slurm_conf_tmpfile.path }}"
427
+ state : absent
428
+ when : _slurm_conf_tmpfile.path is defined
429
+ delegate_to : localhost
430
+ changed_when : false # so molecule doesn't fail
431
+ become : no
432
+
433
+ - name : Notify handler for slurmd restart
434
+ debug :
435
+ msg : " notifying handlers" # meta: noop doesn't support 'when'
436
+ changed_when : true
437
+ when :
438
+ - openhpc_slurm_control_host in ansible_play_hosts
439
+ - hostvars[openhpc_slurm_control_host].ohpc_slurm_conf.changed or hostvars[openhpc_slurm_control_host].ohpc_gres_conf.changed # noqa no-handler
440
+ notify :
441
+ - Restart slurmd service
442
+
443
+ - name : Set slurmctld location for configless operation
444
+ lineinfile :
445
+ path : /etc/sysconfig/slurmd
446
+ line : " SLURMD_OPTIONS='--conf-server {{ openhpc_slurm_control_host_address | default(openhpc_slurm_control_host) }}'"
447
+ regexp : " ^SLURMD_OPTIONS="
448
+ create : yes
449
+ owner : root
450
+ group : root
451
+ mode : 0644
452
+ when :
453
+ - openhpc_enable.batch | default(false)
454
+ - openhpc_slurm_configless
455
+ notify :
456
+ - Restart slurmd service
457
+ # Reloading is sufficent, but using a single handler means no bounce. Realistically this won't regularly change on a running slurmd so restarting is ok.
458
+
459
+ # Munge state could be unchanged but the service is not running.
460
+ # Handle that here.
461
+ - name : Configure Munge service
462
+ service :
463
+ name : munge
464
+ enabled : " {{ openhpc_slurm_service_enabled | bool }}"
465
+ state : " {{ 'started' if openhpc_slurm_service_started | bool else 'stopped' }}"
466
+
467
+ - name : Flush handler
468
+ meta : flush_handlers # as then subsequent "ensure" is a no-op if slurm services bounced
469
+
470
+ - name : Ensure slurmdbd state
471
+ service :
472
+ name : slurmdbd
473
+ enabled : " {{ openhpc_slurm_service_enabled | bool }}"
474
+ state : " {{ 'started' if openhpc_slurm_service_started | bool else 'stopped' }}"
475
+ when : openhpc_enable.database | default(false) | bool
476
+
477
+ # - name: Ensure slurmctld state
478
+ # service:
479
+ # name: slurmctld
480
+ # enabled: "{{ openhpc_slurm_service_enabled | bool }}"
481
+ # state: "{{ 'started' if openhpc_slurm_service_started | bool else 'stopped' }}"
482
+ # when: openhpc_enable.control | default(false) | bool
483
+
484
+ - name : Ensure slurmd state
485
+ service :
486
+ name : slurmd
487
+ enabled : " {{ openhpc_slurm_service_enabled | bool }}"
488
+ state : " {{ 'started' if openhpc_slurm_service_started | bool else 'stopped' }}"
489
+ when : openhpc_enable.batch | default(false) | bool
0 commit comments