Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

152 changes: 70 additions & 82 deletions common/library/modules/slurm_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,28 @@
version_added: "1.0.0"
description:
- This module provides utilities for working with Slurm configuration files.
- It can parse a Slurm conf file into a dictionary (f2d).
- It can convert a dictionary back to Slurm conf INI format (d2f).
- It can parse a Slurm conf file into a dictionary (parse).
- It can convert a dictionary back to Slurm conf INI format (render).
- It can merge multiple configuration sources (files and/or dicts) into one (merge).
options:
op:
description:
- The operation to perform.
- C(f2d) - File to dict. Parse a Slurm conf file and return as dictionary.
- C(d2f) - Dict to file. Convert a dictionary to Slurm conf INI lines.
- C(parse) - File to dict. Parse a Slurm conf file and return as dictionary.
- C(render) - Dict to file. Convert a dictionary to Slurm conf INI lines.
- C(merge) - Merge multiple configuration sources into one.
required: true
type: str
choices: ['f2d', 'd2f', 'merge']
choices: ['parse', 'render', 'merge']
path:
description:
- Path to the Slurm configuration file.
- Required when I(op=f2d).
- Required when I(op=parse).
type: str
conf_map:
description:
- Dictionary of configuration key-value pairs.
- Required when I(op=d2f).
- Required when I(op=render).
type: dict
default: {}
conf_sources:
Expand All @@ -58,33 +58,28 @@
- Used for validation of configuration keys.
type: str
default: slurm
choices: ['slurm', 'cgroup', 'gres', 'mpi', 'slurmdbd']
author:
- Jagadeesh N V ([email protected])
notes:
- Requires Python 3.7+ for ordered dict behavior.
- Array-type parameters (NodeName, PartitionName, SlurmctldHost, etc.) are handled specially.
- Jagadeesh N V (@jagadeeshnv)
'''

EXAMPLES = r'''
# Parse a slurm.conf file into a dictionary
- name: Read slurm.conf
slurm_conf:
op: f2d
op: parse
path: /etc/slurm/slurm.conf
conf_name: slurm
register: slurm_config

# Convert a dictionary to slurm.conf INI lines
- name: Generate slurm.conf lines
slurm_conf:
op: d2f
op: render
conf_map:
ClusterName: mycluster
SlurmctldPort: 6817
SlurmctldHost:
- SlurmctldHost: controller1
- SlurmctldHost: controller2
- controller2
NodeName:
- NodeName: node[1-10]
CPUs: 16
Expand Down Expand Up @@ -118,37 +113,35 @@
'''

RETURN = r'''
slurm_dict:
description: Parsed configuration as a dictionary (when op=f2d).
type: dict
returned: when op=f2d
sample: {"ClusterName": "mycluster", "SlurmctldPort": "6817"}
slurm_conf:
description: Configuration as INI-format lines (when op=d2f).
type: list
returned: when op=d2f
sample: ["ClusterName=mycluster", "SlurmctldPort=6817"]
conf_dict:
description: Merged configuration as a dictionary (when op=merge).
description: Merged configuration as a dictionary (when op=merge or op=parse).
type: dict
returned: when op=merge
returned: when op=merge or op=parse
sample: {"ClusterName": "mycluster", "SlurmctldTimeout": 120}
ini_lines:
description: Merged configuration as INI-format lines (when op=merge).
description: Merged configuration as INI-format lines (when op=merge or op=render).
type: list
returned: when op=merge
returned: when op=merge or op=render
sample: ["ClusterName=mycluster", "SlurmctldTimeout=120"]
'''

# TODO:
# - Module is not case sensitive for conf keys
# - Support for validation of S_P_<data> types
# - Validation for choices for each type
# - Choices types for each type
# - Merge of sub options
# - Hostlist expressions, split and merge computations


from collections import OrderedDict
from ansible.module_utils.basic import AnsibleModule
from ansible.module_utils.input_validation.common_utils.slurm_conf_utils import SlurmParserEnum, all_confs
import os

# NOTE: depends on python3.7+ where dict order is maintained


def read_dict2ini(conf_dict):
"""Convert a configuration dictionary to INI-style lines for slurm.conf."""
data = []
for k, v in conf_dict.items():
if isinstance(v, list):
Expand All @@ -166,77 +159,71 @@ def read_dict2ini(conf_dict):
return data


def parse_slurm_conf(file_path, module):
def parse_slurm_conf(file_path, conf_name, validate):
"""Parses the slurm.conf file and returns it as a dictionary."""
# slurm_dict = {"NodeName": [], "PartitionName": []}
conf_name = module.params['conf_name']
current_conf = all_confs.get(conf_name)
current_conf = all_confs.get(conf_name, {})
slurm_dict = OrderedDict()

if not os.path.exists(file_path):
raise FileNotFoundError(f"{file_path} not found.")

with open(file_path, 'r') as f:
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
# handles any comment after the data
line = line.split('#')[0].strip()

# Skip comments and empty lines
if not line:
continue
# Split the line by one or more spaces
items = line.split()
tmp_dict = OrderedDict()
for item in items:
# module.warn(f"Item: {item}")
# Split only on the first '=' to allow '=' inside the value
key, value = item.split('=', 1)
tmp_dict[key.strip()] = value.strip()
skey = list(tmp_dict.keys())[0]
if skey not in current_conf:
raise Exception(f"Invalid key while parsing {file_path}: {skey}")
# if current_conf[skey] == SlurmParserEnum.S_P_ARRAY or len(tmp_dict) > 1:
if current_conf[skey] == SlurmParserEnum.S_P_ARRAY:
# TODO hostlist expressions and multiple DEFAULT entries handling
# if len(tmp_dict) == 1:
# first_key = list(tmp_dict.keys())[0]
# first_value = list(tmp_dict.values())[0]
# slurm_dict[first_key] = list(
# slurm_dict.get(first_key, [])) + [first_value]
# else:
if validate and skey not in current_conf:
raise ValueError(f"Invalid key while parsing {file_path}: {skey}")
if current_conf.get(skey) == SlurmParserEnum.S_P_ARRAY:
slurm_dict[list(tmp_dict.keys())[0]] = list(
slurm_dict.get(list(tmp_dict.keys())[0], [])) + [tmp_dict]
elif current_conf.get(skey) == SlurmParserEnum.S_P_CSV:
existing_values = [v.strip() for v in slurm_dict.get(skey, "").split(',') if v.strip()]
new_values = [v.strip() for v in tmp_dict[skey].split(',') if v.strip()]
slurm_dict[skey] = ",".join(list(dict.fromkeys(existing_values + new_values)))
elif current_conf.get(skey) == SlurmParserEnum.S_P_LIST:
slurm_dict[skey] = list(slurm_dict.get(skey, [])) + list(tmp_dict.values())
else:
# TODO handle csv values, currently no definite data type for csv values
slurm_dict.update(tmp_dict)

return slurm_dict


def slurm_conf_dict_merge(conf_dict_list, module):
def slurm_conf_dict_merge(conf_dict_list, conf_name):
"""Merge multiple Slurm configuration dictionaries into a single dictionary."""
merged_dict = OrderedDict()
current_conf = all_confs.get(conf_name, {})
for conf_dict in conf_dict_list:
for ky, vl in conf_dict.items():
if isinstance(vl, list):
if current_conf.get(ky) == SlurmParserEnum.S_P_ARRAY:
for item in vl:
if isinstance(item, dict):
# module.warn(f"DICT Key: {ky}, Value: {vl}")
existing_dict = merged_dict.get(ky, {})
inner_dict = existing_dict.get(item.get(ky), {})
inner_dict.update(item)
# TODO Partition node combiner logic
existing_dict[item.get(ky)] = inner_dict
merged_dict[ky] = existing_dict
else:
# module.warn(f"LIST Key: {ky}, Value: {vl}")
existing_list = merged_dict.get(ky, [])
# module.warn(f"Existing list: {existing_list}")
# module.warn(f"Item: {item}")
if item not in existing_list:
# existing_list.append(item)
existing_list.update(item)
# module.warn(f"Updated list: {existing_list}")
merged_dict[ky] = existing_list
elif current_conf.get(ky) == SlurmParserEnum.S_P_LIST:
existing_list = merged_dict.get(ky, [])
if isinstance(vl, list):
new_items = vl
else:
new_items = [vl]
merged_dict[ky] = list(dict.fromkeys(existing_list + new_items))
elif current_conf.get(ky) == SlurmParserEnum.S_P_CSV:
existing_values = [v.strip() for v in merged_dict.get(ky, "").split(',') if v.strip()]
new_values = [v.strip() for v in vl.split(',') if v.strip()]
merged_dict[ky] = ",".join(list(dict.fromkeys(existing_values + new_values)))
else:
merged_dict[ky] = vl
# flatten the dict
Expand All @@ -248,50 +235,51 @@ def slurm_conf_dict_merge(conf_dict_list, module):


def run_module():
"""Entry point for the Ansible module handling slurm.conf operations."""
module_args = {
"path": {'type': 'str'},
"op": {'type': 'str', 'required': True, 'choices': ['f2d', 'd2f', 'merge']},
"op": {'type': 'str', 'required': True, 'choices': ['parse', 'render', 'merge']},
"conf_map": {'type': 'dict', 'default': {}},
"conf_sources": {'type': 'list', 'elements': 'raw', 'default': []},
"conf_name": {'type': 'str', 'default': 'slurm'}
"conf_name": {'type': 'str', 'default': 'slurm'},
"validate": {'type': 'bool', 'default': False}
}

result = {"changed": False, "slurm_dict": {}, "failed": False}
result = {"changed": False, "failed": False}

# Create the AnsibleModule object
module = AnsibleModule(argument_spec=module_args,
required_if=[
('op', 'd2f', ('conf_map',)),
('op', 'render', ('conf_map',)),
('op', 'merge', ('conf_sources',))
],
supports_check_mode=True)
try:
conf_name = module.params['conf_name']
validate = module.params['validate']
# Parse the slurm.conf file
if module.params['op'] == 'f2d':
s_dict = parse_slurm_conf(module.params['path'], module)
result['slurm_dict'] = s_dict
elif module.params['op'] == 'd2f':
if module.params['op'] == 'parse':
s_dict = parse_slurm_conf(module.params['path'], conf_name, validate)
result['conf_dict'] = s_dict
elif module.params['op'] == 'render':
s_list = read_dict2ini(module.params['conf_map'])
result['slurm_conf'] = s_list
result['ini_lines'] = s_list
elif module.params['op'] == 'merge':
conf_dict_list = []
for conf_source in module.params['conf_sources']:
if isinstance(conf_source, dict):
conf_dict_list.append(conf_source)
elif isinstance(conf_source, str):
if not os.path.exists(conf_source):
raise Exception(f"File {conf_source} does not exist")
s_dict = parse_slurm_conf(conf_source, module)
# module.warn(f"Conf dict: {s_dict}")
raise FileNotFoundError(f"File {conf_source} does not exist")
s_dict = parse_slurm_conf(conf_source, conf_name, validate)
conf_dict_list.append(s_dict)
# module.warn("After append")
else:
raise Exception(f"Invalid type for conf_source: {type(conf_source)}")
# module.exit_json(changed=False, conf_dict=conf_dict_list)
merged_dict = slurm_conf_dict_merge(conf_dict_list, module)
raise TypeError(f"Invalid type for conf_source: {type(conf_source)}")
merged_dict = slurm_conf_dict_merge(conf_dict_list, conf_name)
result['conf_dict'] = merged_dict
result['ini_lines'] = read_dict2ini(merged_dict)
except Exception as e:
except (FileNotFoundError, ValueError, TypeError, AttributeError) as e:
result['failed'] = True
result['msg'] = str(e)
module.fail_json(msg=str(e))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,8 @@
- mkdir -p /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /cert /var/log/track /var/lib/packages /hpc_tools/container_images /hpc_tools/scripts
- echo "{{ cloud_init_nfs_path }}/cert /cert nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmd /var/spool/slurmd nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path}}/hpc_tools/container_images /hpc_tools/container_images nfs defaults,_netdev 0 0" >> /etc/fstab
Expand All @@ -153,6 +152,7 @@
- chmod {{ file_mode_755 }} /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm
- chmod {{ file_mode_400 }} /etc/munge/munge.key
- chmod {{ file_mode_755 }} /etc/slurm/epilog.d/
- chmod {{ file_mode_755 }} /etc/slurm/epilog.d/logout_user.sh
- mkdir -p /var/spool/slurmd
- chmod {{ file_mode_755 }} /var/spool/slurmd
- chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool/slurmd
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,10 +307,10 @@
content: |
SELECT VERSION();
SHOW DATABASES;

CREATE DATABASE slurm_acct_db;
CREATE USER 'slurm'@'%' IDENTIFIED BY '{{ hostvars['localhost']['slurm_db_password'] }}';
GRANT ALL PRIVILEGES ON slurm_acct_db.* TO 'slurm'@'%';
CREATE DATABASE IF NOT EXISTS {{ apply_config['slurmdbd']['StorageLoc'] }};
CREATE USER IF NOT EXISTS '{{ apply_config['slurmdbd']['SlurmUser'] }}'@'%' IDENTIFIED BY '{{ hostvars['localhost']['slurm_db_password'] }}';
ALTER USER '{{ apply_config['slurmdbd']['SlurmUser'] }}'@'%' IDENTIFIED BY '{{ hostvars['localhost']['slurm_db_password'] }}';
GRANT ALL PRIVILEGES ON {{ apply_config['slurmdbd']['StorageLoc'] }}.* TO '{{ apply_config['slurmdbd']['SlurmUser'] }}'@'%';
FLUSH PRIVILEGES;

- path: /root/omnia_slurm_scripts/00_munge_setup.sh
Expand Down Expand Up @@ -405,7 +405,7 @@
echo "${value:-$default}"
}
#dir StateSaveLocation
StateSaveLocation=$(get_value_slurm_conf "StateSaveLocation" "/var/spool")
StateSaveLocation=$(get_value_slurm_conf "StateSaveLocation" "/var/spool/slurmctld")
mkdir -pv $StateSaveLocation
chown -v "$SLURM_USER:$SLURM_GROUP" $StateSaveLocation
chmod -v 0744 $StateSaveLocation
Expand Down Expand Up @@ -477,7 +477,7 @@
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab
{% if powervault_config is not defined %}
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/lib/mysql /var/lib/mysql nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmctld /var/spool/slurmctld nfs defaults,_netdev 0 0" >> /etc/fstab
{% endif %}
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@
echo "[INFO] Updating /etc/fstab with NFS entries for Pulp cert, Slurm and Munge paths"
echo "{{ cloud_init_nfs_path }}/cert /cert nfs defaults,_netdev 0 0" >> /etc/fstab
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmd /var/spool/slurmd nfs defaults,_netdev 0 0" >> /etc/fstab
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab
echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab
Expand Down
18 changes: 17 additions & 1 deletion discovery/roles/slurm_config/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,9 @@ __default_config:
SlurmctldPort: 6817
SlurmdPort: 6818
SrunPortRange: "60001-63000"
StateSaveLocation: "/var/spool/state"
StateSaveLocation: "/var/spool/slurmctld"
SlurmdSpoolDir: "/var/spool/slurmd"
SlurmctldParameters: "{{ slurm_ctld_parameters | join(',') }}"
ReturnToService: 2
SchedulerType: sched/backfill
MpiDefault: none
Expand All @@ -283,6 +284,16 @@ __default_config:
SlurmctldTimeout: 120
SlurmdTimeout: 300
Epilog: "/etc/slurm/epilog.d/logout_user.sh"
PluginDir: "{{ plugin_slurm_dir }}"
NodeName:
- NodeName: DEFAULT
State: UNKNOWN
PartitionName:
- PartitionName: DEFAULT
Nodes: ALL
Default: true
MaxTime: INFINITE
State: UP
# S_P_ARRAY type paramater to be provided this way
# Epilog:
# - Epilog: "/etc/slurm/epilog.d/logout_user.sh"
Expand All @@ -293,5 +304,10 @@ __default_config:
SlurmUser: "{{ slurm_user }}"
StorageType: accounting_storage/mysql
StorageLoc: slurm_acct_db
StoragePort: "{{ slurm_db_port }}"
StorageUser: "{{ slurm_dbd_db_username }}"
StoragePass: "{{ slurm_db_password }}"
PluginDir: "{{ plugin_slurm_dir }}"
DbdPort: "{{ slurm_dbd_port }}"
gres:
AutoDetect: nvml
Loading