Skip to content

Commit 167ef86

Browse files
authored
Merge pull request #3470 from vkarak/refactor/use-clustershell-nodeset
[enhancement] Use `ClusterShell` to manipulate node lists instead of our own code
2 parents cbb1e52 + 61fc6cd commit 167ef86

File tree

5 files changed

+29
-277
lines changed

5 files changed

+29
-277
lines changed

docs/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
archspec==0.2.5
2+
ClusterShell==1.9.3
23
docutils==0.18.1; python_version < '3.9'
34
docutils==0.21.2; python_version >= '3.9'
45
jinja2==3.0.3; python_version == '3.6'

reframe/utility/__init__.py

Lines changed: 7 additions & 266 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import reframe
2121

22+
from ClusterShell.NodeSet import NodeSet, NodeSetParseError
2223
from collections import UserDict
2324
from hashlib import sha256
2425
from . import typecheck as typ
@@ -753,112 +754,6 @@ def _is_valid_for_env(m, e):
753754
yield (p.fullname, e.name, m)
754755

755756

756-
def _delta_encode(seq):
757-
'''Delta-encode sequence.
758-
759-
The input list must be at least of size 1.
760-
761-
Example of delta encoding:
762-
763-
- Input list:
764-
1 2 5 6 7 8 9 125
765-
766-
- Output list:
767-
1 1 3 1 1 1 1 106
768-
^
769-
|
770-
First element
771-
of the original list.
772-
773-
:returns: the encoded list. The first element of the encoded sequence is
774-
the first element of the original sequence.
775-
776-
'''
777-
778-
assert len(seq) >= 1
779-
780-
ret = [seq[0]]
781-
for i in range(1, len(seq)):
782-
ret.append(seq[i] - seq[i-1])
783-
784-
return ret
785-
786-
787-
def _rl_encode(seq):
788-
'''Run-length encode a delta-encoded sequence.
789-
790-
The input list must be at least of size 1.
791-
792-
Example of run-length encoding:
793-
794-
- Original list:
795-
1 2 5 6 7 8 9 125
796-
797-
- Delta-encoded list:
798-
1 1 3 1 1 1 1 106
799-
800-
- Run-length-encoded list:
801-
802-
(1,1,2), (5,1,5), (125,1,1)
803-
804-
For convenience, in each RLE unit we use the first element of the original
805-
unit and not the delta value from the previous unit.
806-
807-
:returns: the encoded list. Each element of the list is a three-tuple
808-
containing the first element of the unit, the delta value of the unit
809-
and its length.
810-
811-
'''
812-
assert len(seq) >= 1
813-
814-
encoded = []
815-
curr_unit = [seq[0], 1, 1] # current RLE unit
816-
for delta in seq[1:]:
817-
uelem, udelta, ulen = curr_unit
818-
if udelta is None:
819-
curr_unit[1] = delta
820-
curr_unit[2] += 1
821-
elif udelta != delta:
822-
# New unit; we don't set the delta of the new unit here, because
823-
# `delta` is just the jump for the previous unit. The length of
824-
# the unit is initialized to one, because the last processed
825-
# element *is* part of the new unit.
826-
encoded.append(tuple(curr_unit))
827-
curr_unit = [uelem + udelta*(ulen-1) + delta, None, 1]
828-
else:
829-
# Increase unit
830-
curr_unit[2] += 1
831-
832-
# Fix last unit and add it to the encoded list
833-
if curr_unit[1] is None:
834-
# Conveniently set delta to 1
835-
curr_unit[1] = 1
836-
837-
encoded.append(tuple(curr_unit))
838-
return encoded
839-
840-
841-
def _parse_node(nodename):
842-
m = re.search(r'(.*\D)(\d+)(\D*)', nodename)
843-
if m is None:
844-
basename = nodename
845-
width = 0
846-
nodeid = None
847-
suffix = None
848-
else:
849-
basename = m.group(1)
850-
_id = m.group(2).lstrip('0')
851-
if _id == '':
852-
# This is to cover nodes with id=0, e.g., x000
853-
_id = '0'
854-
855-
nodeid = int(_id)
856-
width = len(m.group(2))
857-
suffix = m.group(3)
858-
859-
return basename, width, nodeid, suffix
860-
861-
862757
def count_digits(n):
863758
'''Count the digits of a decimal number.
864759
@@ -873,176 +768,22 @@ def count_digits(n):
873768
return num_digits
874769

875770

876-
def _common_prefix(s1, s2):
877-
pos = 0
878-
for i in range(min(len(s1), len(s2))):
879-
if s1[i] != s2[i]:
880-
break
881-
882-
pos += 1
883-
884-
return s1[:pos], s1[pos:], s2[pos:]
885-
886-
887-
class _NodeGroup:
888-
def __init__(self, name, width, suffix):
889-
self.__name = name
890-
self.__suffix = suffix
891-
self.__width = width
892-
self.__nodes = []
893-
894-
@property
895-
def name(self):
896-
return self.__name
897-
898-
@property
899-
def suffix(self):
900-
return self.__suffix
901-
902-
@property
903-
def width(self):
904-
return self.__width
905-
906-
@property
907-
def nodes(self):
908-
return self.__nodes
909-
910-
def add(self, nid):
911-
self.__nodes.append(nid)
912-
913-
def __str__(self):
914-
if not self.__nodes:
915-
return self.__name
916-
917-
abbrev = []
918-
encoded = _rl_encode(_delta_encode(self.nodes))
919-
for unit in encoded:
920-
start, delta, size = unit
921-
if size == 1:
922-
s_start = str(start).zfill(self.width)
923-
abbrev.append(f'{self.name}{s_start}{self.suffix}')
924-
elif delta != 1:
925-
# We simply unpack node lists with delta != 1
926-
for i in range(size):
927-
s_start = str(start + i*delta).zfill(self.width)
928-
abbrev.append(f'{self.name}{s_start}{self.suffix}')
929-
else:
930-
last = start + delta*(size-1)
931-
digits_last = count_digits(last)
932-
pad = self.width - digits_last
933-
nd_range = self.name
934-
if pad > 0:
935-
for _ in range(pad):
936-
nd_range += '0'
937-
938-
s_first = str(start).zfill(digits_last)
939-
s_last = str(last)
940-
prefix, s_first, s_last = _common_prefix(s_first, s_last)
941-
nd_range += f'{prefix}[{s_first}-{s_last}]{self.suffix}'
942-
abbrev.append(nd_range)
943-
944-
return ','.join(abbrev)
945-
946-
def __hash__(self):
947-
return hash(self.name) ^ hash(self.suffix) ^ hash(self.width)
948-
949-
def __eq__(self, other):
950-
if not isinstance(other, _NodeGroup):
951-
return NotImplemented
952-
953-
return (self.name == other.name and
954-
self.suffix == other.suffix and
955-
self.width == other.width)
956-
957-
958771
def nodelist_abbrev(nodes):
959-
'''Create an abbreviated string representation of the node list.
960-
961-
For example, the node list
962-
963-
.. code-block:: python
964-
965-
['nid001', 'nid002', 'nid010', 'nid011', 'nid012', 'nid510', 'nid511']
966-
967-
will be abbreviated as follows:
968-
969-
.. code-block:: none
970-
971-
nid00[1-2],nid0[10-12],nid51[0-1]
972-
973-
974-
.. versionadded:: 3.5.3
975-
976-
:arg nodes: The node list to abbreviate.
977-
:returns: The abbreviated list representation.
978-
979-
'''
980-
981-
# The algorithm used for abbreviating the list is a standard index
982-
# compression algorithm, the run-length encoding. We first delta encode
983-
# the nodes based on their id, which we retrieve from their name, and then
984-
# run-length encode the list of deltas. The resulting run-length-encoded
985-
# units are then used to generate the abbreviated representation using
986-
# some formatting sugar. The abbreviation is handled in the `__str__()`
987-
# function of the `_NodeGroup`. The purpose of the `_NodeGroup` is to
988-
# group nodes in the list that belong to the same family, namely have the
989-
# same prefix. We then apply the run-length encoding to each group
990-
# independently.
991-
992772
if isinstance(nodes, str):
993773
raise TypeError('nodes argument cannot be a string')
994774

995775
if not isinstance(nodes, collections.abc.Sequence):
996776
raise TypeError('nodes argument must be a Sequence')
997777

998-
node_groups = {}
999-
for n in sorted(nodes):
1000-
basename, width, nid, suffix = _parse_node(n)
1001-
ng = _NodeGroup(basename, width, suffix)
1002-
node_groups.setdefault(ng, ng)
1003-
if nid is not None:
1004-
node_groups[ng].add(nid)
1005-
1006-
return ','.join(str(ng) for ng in node_groups)
778+
ns = NodeSet.fromlist(nodes)
779+
return str(ns)
1007780

1008781

1009782
def nodelist_expand(nodespec):
1010-
'''Expand the nodes in ``nodespec`` to a list of nodes.
1011-
1012-
:arg nodespec: A node specification as the one returned by
1013-
:func:`nodelist_abbrev`
1014-
:returns: The list of nodes corresponding to the given node specification.
1015-
1016-
.. versionadded:: 4.0.0
1017-
'''
1018-
1019-
if not isinstance(nodespec, str):
1020-
raise TypeError('nodespec argument must be a string')
1021-
1022-
if nodespec == '':
1023-
return []
1024-
1025-
nodespec_parts = nodespec.split(',')
1026-
node_patt = re.compile(
1027-
r'(?P<prefix>.+)\[(?P<l>\d+)-(?P<u>\d+)\](?P<suffix>.*)'
1028-
)
1029-
nodes = []
1030-
for ns in nodespec_parts:
1031-
if '[' not in ns and ']' not in ns:
1032-
nodes.append(ns)
1033-
continue
1034-
1035-
match = node_patt.match(ns)
1036-
if not match:
1037-
raise ValueError(f'invalid nodespec: {nodespec}')
1038-
1039-
prefix, suffix = match.group('prefix'), match.group('suffix')
1040-
low, upper = int(match.group('l')), int(match.group('u'))
1041-
width = count_digits(upper)
1042-
for nid in range(low, upper+1):
1043-
nodes.append(f'{prefix}{nid:0{width}}{suffix}')
1044-
1045-
return nodes
783+
try:
784+
return list(NodeSet(nodespec))
785+
except NodeSetParseError as err:
786+
raise ValueError('invalid nodespec') from err
1046787

1047788

1048789
def cache_return_value(fn):

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
archspec==0.2.5
22
argcomplete==3.1.2; python_version < '3.8'
33
argcomplete==3.6.1; python_version >= '3.8'
4+
ClusterShell==1.9.3
45
filelock==3.4.1; python_version == '3.6'
56
filelock==3.12.2; python_version == '3.7'
67
filelock==3.16.1; python_version == '3.8'

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ install_requires =
3030
archspec >= 0.2.4
3131
argcomplete
3232
argcomplete <= 3.1.2; python_version < '3.8'
33+
ClusterShell
3334
filelock
3435
filelock<=3.16.1; python_version == '3.8'
3536
filelock<=3.12.2; python_version == '3.7'

0 commit comments

Comments
 (0)