python: fix performance of ResourceStatus.get_drain_info()

grondo · grondo · commit a6abe6366bb3 · 2024-04-06T11:16:14.000-07:00
Problem: The ResourceStatus get_drain_info() method is very slow when there are many drained ranks because it uses a linear search. Since it will be called at least once per rank, it causes an exponential performance hit based on the number of drained ranks. Add a mapping from rank to DrainInfo object in the ResourceStatus class and use this instead of a search in get_drain_info(). Fixes #5859
diff --git a/src/bindings/python/flux/resource/status.py b/src/bindings/python/flux/resource/status.py
@@ -108,13 +108,16 @@ def _recalculate(self, include_ranks=None):
 
         # drain_info: ranks, timestamp, reason tuples for all drained resources
         self.drain_info = []
+        self._drain_lookup = {}
         for drain_ranks, entry in self.rstatus["drain"].items():
             ranks = IDset(drain_ranks)
             if include_ranks is not None:
                 ranks = ranks.intersect(include_ranks)
             self.drained += ranks
             info = DrainInfo(ranks, entry["timestamp"], entry["reason"])
             self.drain_info.append(info)
+            for rank in ranks:
+                self._drain_lookup[rank] = info
 
         # create the set of draining ranks as the intersection of
         #  drained and allocated
@@ -146,7 +149,7 @@ def get_drain_info(self, rank):
         """
         if rank not in self.all:
             raise ValueError("invalid rank {rank}")
-        return next((i for i in self.drain_info if rank in i.ranks), None)
+        return self._drain_lookup.get(rank)
 
 
 class ResourceStatusRPC: