Skip to content

Commit 13917ec

Browse files
committed
bug: fix graphic to show entire tree
Signed-off-by: vsoch <[email protected]>
1 parent acef898 commit 13917ec

23 files changed

+50
-26
lines changed

fluxbind/graph/graph.py

Lines changed: 49 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -278,18 +278,18 @@ def match_resources(self, jobspec, allocated_gps=None):
278278
log.debug(f"Successfully found a slot with {len(final_allocation)} objects.")
279279
return final_allocation
280280

281-
def sort_by_affinity(self, candidates, affinity, allocated):
281+
def sort_by_affinity(self, candidates, affinity, allocated, domain_gp):
282282
"""
283283
Sort list of candidates by affinity so we get closest one.
284284
"""
285285
target_type = self.translate_type(affinity.get("type"))
286286
if not target_type:
287287
log.warning("Affinity spec missing 'type'.")
288288
return candidates
289-
machine_gp = self.find_objects(type="Machine")[0][0]
290-
291-
# Affinity targets can be anywhere, so search from Machine
292-
targets = self.get_available_children(machine_gp, target_type, allocated)
289+
290+
# Search within the domain we were provided, not across the machine
291+
log.debug(f" -> Searching for affinity target '{target_type}' within the current domain.")
292+
targets = self.get_available_children(domain_gp, target_type, allocated)
293293
if not targets:
294294
log.warning(f"Affinity target '{target_type}' not found.")
295295
return candidates
@@ -332,46 +332,70 @@ def translate_type(self, requested_type: str):
332332

333333
def find_bindable_leaves(self, total_allocation, bind_level):
334334
"""
335-
Given an allocation, find bindable leaf nodes.
336-
337-
This function handles three cases for each allocated resource:
338-
1. Identity: The resource is already the desired bindable type.
339-
2. Descendants: The resource is a container (e.g., Package) of the desired type.
340-
3. Ancestors: The resource is a child (e.g., PU) of the desired type.
335+
Transforms a list of allocated resources into a final list of bindable
336+
nodes by correctly handling all relationships for EACH allocated object.
341337
"""
342338
leaf_nodes = []
343339
log.debug(f"Transforming {len(total_allocation)} allocated objects to bind_level '{bind_level}'...")
344340

345-
# Concrete hwloc type string for our target bind level.
346-
# Hey spack, we can concretize too!
347341
bind_type_concrete = self.translate_type(bind_level)
348342

343+
# Iterate through every object found by the allocator.
349344
for gp, data in total_allocation:
350345

351-
# Case 1: The allocated object IS the type we want to bind to.
346+
# Case 1 (Identity): The object is already the type we want to bind to.
352347
if data.get('type') == bind_type_concrete:
353348
leaf_nodes.append((gp, data))
354349
continue
355350

356-
# Case 2: The allocated object is a container of the type we want to bind to.
357-
descendants = self.get_descendants(gp, type=bind_type_concrete)
358-
if descendants:
359-
leaf_nodes.extend(descendants)
360-
continue
351+
# Case 2 (Container): The object is a container. Find its children of the target type.
352+
found_descendants = False
353+
container_type = data.get("type")
354+
355+
# If the container is a NUMANode, the relationship is via the parent Package.
356+
if container_type == "NUMANode":
357+
package = self.get_ancestor_of_type(gp, "Package")
358+
if package:
359+
descendants = self.get_descendants(package[0], type=bind_type_concrete)
360+
if descendants:
361+
leaf_nodes.extend(descendants)
362+
found_descendants = True
363+
364+
# If the container is a PCIDev, the relationship is via NUMA locality.
365+
elif container_type == "PCIDev":
366+
numa_idx = data.get("numa_os_index")
367+
if numa_idx is not None:
368+
all_bindable = self.find_objects(type=bind_type_concrete)
369+
local_nodes = [node for node in all_bindable if node[1].get("numa_os_index") == numa_idx]
370+
if local_nodes:
371+
leaf_nodes.extend(local_nodes)
372+
found_descendants = True
361373

362-
# Case 3: The allocated object is a child of the type we want to bind to.
363-
# Example: total_allocation=[PU:0], bind_level='core'
374+
# For all other containers, the relationship is simple hierarchy.
375+
else:
376+
descendants = self.get_descendants(gp, type=bind_type_concrete)
377+
if descendants:
378+
leaf_nodes.extend(descendants)
379+
found_descendants = True
380+
381+
# If we successfully found descendants, onto the next!
382+
if found_descendants:
383+
continue
384+
385+
# Last case (Child): If it's not the right type and not a container, it might be a child.
386+
# Find its ancestor of the target type (e.g., allocated PU, bind to Core).
364387
ancestor = self.get_ancestor_of_type(gp, bind_type_concrete)
365388
if ancestor:
366389
leaf_nodes.append(ancestor)
367390

368-
# De-duplicate the final list (e.g., if two PUs map to the same Core)
369-
# and sort for deterministic assignment.
391+
# De-duplicate the final list and sort for deterministic assignment.
370392
leaf_nodes = list({gp: (gp, data) for gp, data in leaf_nodes}.values())
371-
leaf_nodes.sort(key=self.get_sort_key_for_node)
393+
leaf_nodes.sort(key=self.get_sort_key_for_node)
394+
372395
log.debug(f"Transformation resulted in {len(leaf_nodes)} unique bindable leaf nodes.")
373396
return leaf_nodes
374397

398+
375399
def summarize(self, nodes):
376400
"""
377401
Given a set of nodes in the graph (a set of resources) print a textual visual.
@@ -545,7 +569,7 @@ def find_assignment_recursive(self, request, domain_gp, allocated, depth=0):
545569
self.last_affinity_target = (target_gp, domain_node)
546570
else:
547571
log.debug(f"{indent} -> Sorting candidates by GLOBAL affinity to {affinity_spec}")
548-
candidates = self.sort_by_affinity(candidates, affinity_spec, allocated)
572+
candidates = self.sort_by_affinity(candidates, affinity_spec, allocated, domain_gp)
549573

550574
if len(candidates) < count:
551575
log.debug(
102 KB
Loading
111 KB
Loading
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)