Skip to content

Commit fc48709

Browse files
committed
Added topology aware scheduled for compute VMs
1 parent deab8e8 commit fc48709

File tree

7 files changed

+135
-0
lines changed

7 files changed

+135
-0
lines changed

ansible/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,5 @@ roles/*
9696
!roles/nhc/**
9797
!roles/eessi/
9898
!roles/eessi/**
99+
!roles/topology/
100+
!roles/topology/**
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# If set to non-empty string, will override topology.conf file auto-detected from OpenStack project
2+
topology_topology_override: ""
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/usr/bin/python
2+
3+
# Copyright: (c) 2025, StackHPC
4+
# Apache 2 License
5+
6+
from ansible.module_utils.basic import AnsibleModule
7+
import openstack
8+
9+
DOCUMENTATION = """
10+
---
11+
module: map_hosts
12+
short_description: Creates map of OpenStack VM network topology
13+
description:
14+
- Creates map representing the network topology tree of an OpenStack project with a heirarchy
15+
of: Availability Zone -> Hypervisors/Baremetal nodes -> VMs/Baremetal instances
16+
options:
17+
compute_vms:
18+
description:
19+
- List of VM names within the target OpenStack project to include in the tree
20+
required: true
21+
type: str
22+
author:
23+
- Steve Brasier, William Tripp, StackHPC
24+
"""
25+
26+
RETURN = """
27+
topology:
28+
description:
29+
Map representing tree of project topology. Top level keys are AZ names, their values
30+
are maps of shortened unique identifiers of hosts UUIDs to lists of VM names
31+
returned: success
32+
type: dict[str, dict[str,list[str]]]
33+
sample:
34+
"nova-az":
35+
"afe9":
36+
- "mycluster-compute-0"
37+
- "mycluster-compute-1"
38+
"00f9":
39+
- "mycluster-compute-vm-on-other-hypervisor"
40+
"""
41+
42+
EXAMPLES = """
43+
- name: Get topology map
44+
map_hosts:
45+
compute_vms:
46+
- mycluster-compute-0
47+
- mycluster-compute-1
48+
"""
49+
50+
def min_prefix(uuids, start=4):
51+
""" Take a list of uuids and return the smallest length >= start which keeps them unique """
52+
for length in range(start, len(uuids[0])):
53+
prefixes = set(uuid[:length] for uuid in uuids)
54+
if len(prefixes) == len(uuids):
55+
return length
56+
57+
def run_module():
58+
module_args = dict(
59+
compute_vms=dict(type='list', elements='str', required=True)
60+
)
61+
module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
62+
63+
conn = openstack.connection.from_config()
64+
65+
servers = [s for s in conn.compute.servers() if s["name"] in module.params["compute_vms"]]
66+
67+
topo = {}
68+
all_host_ids = []
69+
for s in servers:
70+
az = s['availability_zone']
71+
host_id = s['host_id']
72+
if host_id != '':
73+
all_host_ids.append(host_id)
74+
if az not in topo:
75+
topo[az] = {}
76+
if host_id not in topo[az]:
77+
topo[az][host_id] = []
78+
topo[az][host_id].append(s['name'])
79+
80+
uuid_len = min_prefix(list(set(all_host_ids)))
81+
82+
for az in topo:
83+
topo[az] = dict((k[:uuid_len], v) for (k, v) in topo[az].items())
84+
85+
result = {
86+
"changed": True,
87+
"topology": topo,
88+
}
89+
90+
module.exit_json(**result)
91+
92+
93+
def main():
94+
run_module()
95+
96+
97+
if __name__ == "__main__":
98+
main()
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
- name: Map instances to hosts
2+
become: false
3+
map_hosts:
4+
compute_vms: "{{ groups['compute'] }}"
5+
register: _topology
6+
delegate_to: localhost
7+
run_once: true
8+
9+
- name: Template topology.conf
10+
become: true
11+
ansible.builtin.template:
12+
src: templates/topology.conf.j2
13+
dest: /etc/slurm/topology.conf
14+
owner: root
15+
group: root
16+
mode: 0644
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# topology.conf
2+
# Switch Configuration
3+
{% if topology_topology_override != '' %}
4+
{{ topology_topology_override }}
5+
{% else %}
6+
{% for az in _topology.topology.keys() %}
7+
{% for instance_host in _topology.topology[az].keys() %}
8+
SwitchName={{ instance_host }} Nodes={{ _topology.topology[az][instance_host] | join(",") }}
9+
{% endfor %}
10+
SwitchName={{ az }} Switches={{ _topology.topology[az].keys() | join(",") }}
11+
{% endfor %}
12+
SwitchName=master Switches={{ _topology.topology.keys() | join(",") }}
13+
{% endif %}

ansible/slurm.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@
6161
tags:
6262
- openhpc
6363
tasks:
64+
- include_role:
65+
name: topology
66+
when: appliances_mode == 'configure'
6467
- include_role:
6568
name: stackhpc.openhpc
6669
tasks_from: "{{ 'runtime.yml' if appliances_mode == 'configure' else 'main.yml' }}"

environments/common/inventory/group_vars/all/openhpc.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ openhpc_config_default:
5757
- enable_configless
5858
TaskPlugin: task/cgroup,task/affinity
5959
ReturnToService: 2 # workaround for templating bug TODO: Remove once on stackhpc.openhpc v1.2.0
60+
TopologyPlugin: topology/tree
6061

6162
# default additional slurm.conf parameters when "rebuild" enabled:
6263
openhpc_config_rebuild:

0 commit comments

Comments
 (0)