Merge pull request #3470 from vkarak/refactor/use-clustershell-nodeset

vkarak · web-flow · commit 167ef862ab9c · 2025-05-03T00:13:00.000+02:00
[enhancement] Use `ClusterShell` to manipulate node lists instead of our own code
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,4 +1,5 @@
 archspec==0.2.5
+ClusterShell==1.9.3
 docutils==0.18.1; python_version < '3.9'
 docutils==0.21.2; python_version >= '3.9'
 jinja2==3.0.3; python_version == '3.6'
diff --git a/reframe/utility/__init__.py b/reframe/utility/__init__.py
@@ -19,6 +19,7 @@
 
 import reframe
 
+from ClusterShell.NodeSet import NodeSet, NodeSetParseError
 from collections import UserDict
 from hashlib import sha256
 from . import typecheck as typ
@@ -753,112 +754,6 @@ def _is_valid_for_env(m, e):
                     yield (p.fullname, e.name, m)
 
 
-def _delta_encode(seq):
-    '''Delta-encode sequence.
-
-    The input list must be at least of size 1.
-
-    Example of delta encoding:
-
-    - Input list:
-       1 2 5 6 7 8 9 125
-
-    - Output list:
-       1 1 3 1 1 1 1 106
-       ^
-       |
-     First element
-     of the original list.
-
-    :returns: the encoded list. The first element of the encoded sequence is
-        the first element of the original sequence.
-
-    '''
-
-    assert len(seq) >= 1
-
-    ret = [seq[0]]
-    for i in range(1, len(seq)):
-        ret.append(seq[i] - seq[i-1])
-
-    return ret
-
-
-def _rl_encode(seq):
-    '''Run-length encode a delta-encoded sequence.
-
-    The input list must be at least of size 1.
-
-    Example of run-length encoding:
-
-    - Original list:
-       1 2 5 6 7 8 9 125
-
-    - Delta-encoded list:
-       1 1 3 1 1 1 1 106
-
-    - Run-length-encoded list:
-
-       (1,1,2), (5,1,5), (125,1,1)
-
-    For convenience, in each RLE unit we use the first element of the original
-    unit and not the delta value from the previous unit.
-
-    :returns: the encoded list. Each element of the list is a three-tuple
-        containing the first element of the unit, the delta value of the unit
-        and its length.
-
-    '''
-    assert len(seq) >= 1
-
-    encoded = []
-    curr_unit = [seq[0], 1, 1]     # current RLE unit
-    for delta in seq[1:]:
-        uelem, udelta, ulen = curr_unit
-        if udelta is None:
-            curr_unit[1] = delta
-            curr_unit[2] += 1
-        elif udelta != delta:
-            # New unit; we don't set the delta of the new unit here, because
-            # `delta` is just the jump for the previous unit. The length of
-            # the unit is initialized to one, because the last processed
-            # element *is* part of the new unit.
-            encoded.append(tuple(curr_unit))
-            curr_unit = [uelem + udelta*(ulen-1) + delta, None, 1]
-        else:
-            # Increase unit
-            curr_unit[2] += 1
-
-    # Fix last unit and add it to the encoded list
-    if curr_unit[1] is None:
-        # Conveniently set delta to 1
-        curr_unit[1] = 1
-
-    encoded.append(tuple(curr_unit))
-    return encoded
-
-
-def _parse_node(nodename):
-    m = re.search(r'(.*\D)(\d+)(\D*)', nodename)
-    if m is None:
-        basename = nodename
-        width = 0
-        nodeid = None
-        suffix = None
-    else:
-        basename = m.group(1)
-        _id = m.group(2).lstrip('0')
-        if _id == '':
-            # This is to cover nodes with id=0, e.g., x000
-            _id = '0'
-
-        nodeid = int(_id)
-        width = len(m.group(2))
-        suffix = m.group(3)
-
-    return basename, width, nodeid, suffix
-
-
 def count_digits(n):
     '''Count the digits of a decimal number.
 
@@ -873,176 +768,22 @@ def count_digits(n):
     return num_digits
 
 
-def _common_prefix(s1, s2):
-    pos = 0
-    for i in range(min(len(s1), len(s2))):
-        if s1[i] != s2[i]:
-            break
-
-        pos += 1
-
-    return s1[:pos], s1[pos:], s2[pos:]
-
-
-class _NodeGroup:
-    def __init__(self, name, width, suffix):
-        self.__name = name
-        self.__suffix = suffix
-        self.__width = width
-        self.__nodes = []
-
-    @property
-    def name(self):
-        return self.__name
-
-    @property
-    def suffix(self):
-        return self.__suffix
-
-    @property
-    def width(self):
-        return self.__width
-
-    @property
-    def nodes(self):
-        return self.__nodes
-
-    def add(self, nid):
-        self.__nodes.append(nid)
-
-    def __str__(self):
-        if not self.__nodes:
-            return self.__name
-
-        abbrev = []
-        encoded = _rl_encode(_delta_encode(self.nodes))
-        for unit in encoded:
-            start, delta, size = unit
-            if size == 1:
-                s_start = str(start).zfill(self.width)
-                abbrev.append(f'{self.name}{s_start}{self.suffix}')
-            elif delta != 1:
-                # We simply unpack node lists with delta != 1
-                for i in range(size):
-                    s_start = str(start + i*delta).zfill(self.width)
-                    abbrev.append(f'{self.name}{s_start}{self.suffix}')
-            else:
-                last = start + delta*(size-1)
-                digits_last = count_digits(last)
-                pad = self.width - digits_last
-                nd_range = self.name
-                if pad > 0:
-                    for _ in range(pad):
-                        nd_range += '0'
-
-                s_first = str(start).zfill(digits_last)
-                s_last  = str(last)
-                prefix, s_first, s_last = _common_prefix(s_first, s_last)
-                nd_range += f'{prefix}[{s_first}-{s_last}]{self.suffix}'
-                abbrev.append(nd_range)
-
-        return ','.join(abbrev)
-
-    def __hash__(self):
-        return hash(self.name) ^ hash(self.suffix) ^ hash(self.width)
-
-    def __eq__(self, other):
-        if not isinstance(other, _NodeGroup):
-            return NotImplemented
-
-        return (self.name == other.name and
-                self.suffix == other.suffix and
-                self.width == other.width)
-
-
 def nodelist_abbrev(nodes):
-    '''Create an abbreviated string representation of the node list.
-
-    For example, the node list
-
-    .. code-block:: python
-
-       ['nid001', 'nid002', 'nid010', 'nid011', 'nid012', 'nid510', 'nid511']
-
-    will be abbreviated as follows:
-
-    .. code-block:: none
-
-       nid00[1-2],nid0[10-12],nid51[0-1]
-
-
-    .. versionadded:: 3.5.3
-
-    :arg nodes: The node list to abbreviate.
-    :returns: The abbreviated list representation.
-
-    '''
-
-    # The algorithm used for abbreviating the list is a standard index
-    # compression algorithm, the run-length encoding. We first delta encode
-    # the nodes based on their id, which we retrieve from their name, and then
-    # run-length encode the list of deltas. The resulting run-length-encoded
-    # units are then used to generate the abbreviated representation using
-    # some formatting sugar. The abbreviation is handled in the `__str__()`
-    # function of the `_NodeGroup`. The purpose of the `_NodeGroup` is to
-    # group nodes in the list that belong to the same family, namely have the
-    # same prefix. We then apply the run-length encoding to each group
-    # independently.
-
     if isinstance(nodes, str):
         raise TypeError('nodes argument cannot be a string')
 
     if not isinstance(nodes, collections.abc.Sequence):
         raise TypeError('nodes argument must be a Sequence')
 
-    node_groups = {}
-    for n in sorted(nodes):
-        basename, width, nid, suffix = _parse_node(n)
-        ng = _NodeGroup(basename, width, suffix)
-        node_groups.setdefault(ng, ng)
-        if nid is not None:
-            node_groups[ng].add(nid)
-
-    return ','.join(str(ng) for ng in node_groups)
+    ns = NodeSet.fromlist(nodes)
+    return str(ns)
 
 
 def nodelist_expand(nodespec):
-    '''Expand the nodes in ``nodespec`` to a list of nodes.
-
-    :arg nodespec: A node specification as the one returned by
-        :func:`nodelist_abbrev`
-    :returns: The list of nodes corresponding to the given node specification.
-
-    .. versionadded:: 4.0.0
-    '''
-
-    if not isinstance(nodespec, str):
-        raise TypeError('nodespec argument must be a string')
-
-    if nodespec == '':
-        return []
-
-    nodespec_parts = nodespec.split(',')
-    node_patt = re.compile(
-        r'(?P<prefix>.+)\[(?P<l>\d+)-(?P<u>\d+)\](?P<suffix>.*)'
-    )
-    nodes = []
-    for ns in nodespec_parts:
-        if '[' not in ns and ']' not in ns:
-            nodes.append(ns)
-            continue
-
-        match = node_patt.match(ns)
-        if not match:
-            raise ValueError(f'invalid nodespec: {nodespec}')
-
-        prefix, suffix = match.group('prefix'), match.group('suffix')
-        low, upper = int(match.group('l')), int(match.group('u'))
-        width = count_digits(upper)
-        for nid in range(low, upper+1):
-            nodes.append(f'{prefix}{nid:0{width}}{suffix}')
-
-    return nodes
+    try:
+        return list(NodeSet(nodespec))
+    except NodeSetParseError as err:
+        raise ValueError('invalid nodespec') from err
 
 
 def cache_return_value(fn):
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,7 @@
 archspec==0.2.5
 argcomplete==3.1.2; python_version < '3.8'
 argcomplete==3.6.1; python_version >= '3.8'
+ClusterShell==1.9.3
 filelock==3.4.1; python_version == '3.6'
 filelock==3.12.2; python_version == '3.7'
 filelock==3.16.1; python_version == '3.8'
diff --git a/setup.cfg b/setup.cfg
@@ -30,6 +30,7 @@ install_requires =
     archspec >= 0.2.4
     argcomplete
     argcomplete <= 3.1.2; python_version < '3.8'
+    ClusterShell
     filelock
     filelock<=3.16.1; python_version == '3.8'
     filelock<=3.12.2; python_version == '3.7'
diff --git a/unittests/test_utility.py b/unittests/test_utility.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`archspec==0.2.5`
	`2`	`+ClusterShell==1.9.3`
`2`	`3`	`docutils==0.18.1; python_version < '3.9'`
`3`	`4`	`docutils==0.21.2; python_version >= '3.9'`
`4`	`5`	`jinja2==3.0.3; python_version == '3.6'`