Skip to content

Commit 3934a7d

Browse files
author
Vasileios Karakasis
committed
Add utility function for abbreviating node lists
1 parent 769a0ad commit 3934a7d

File tree

2 files changed

+271
-0
lines changed

2 files changed

+271
-0
lines changed

reframe/utility/__init__.py

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,238 @@ def _is_valid_for_env(m, e):
643643
yield (p.fullname, e.name, m)
644644

645645

646+
def _delta_encode(seq):
647+
'''Delta-encode sequence.
648+
649+
The input list must be at least of size 1.
650+
651+
:returns: the encoded list. The first element of the encoded sequence is
652+
the first element of the original sequence.
653+
654+
'''
655+
656+
assert len(seq) >= 1
657+
658+
ret = [seq[0]]
659+
for i in range(1, len(seq)):
660+
ret.append(seq[i] - seq[i-1])
661+
662+
return ret
663+
664+
665+
def _rl_encode(seq):
666+
'''Run-length encode a delta-encoded sequence.
667+
668+
The input list must be at least of size 1.
669+
670+
:returns: the encoded list. Each element of the list is a three-tuple
671+
containing the first element of the unit, the delta value of the unit
672+
and its length.
673+
674+
'''
675+
assert len(seq) >= 1
676+
677+
encoded = []
678+
curr_unit = [seq[0], 1, 1] # current RLE unit
679+
for delta in seq[1:]:
680+
uelem, udelta, ulen = curr_unit
681+
if udelta is None:
682+
curr_unit[1] = delta
683+
curr_unit[2] += 1
684+
elif udelta != delta:
685+
# New unit; we don't set the delta of the new unit here, because
686+
# `delta` is just the jump for the previous unit. The length of
687+
# the unit is initialized to one, because the last processed
688+
# element *is* part of the new unit.
689+
encoded.append(tuple(curr_unit))
690+
curr_unit = [uelem + udelta*(ulen-1) + delta, None, 1]
691+
else:
692+
# Increase unit
693+
curr_unit[2] += 1
694+
695+
# Fix last unit and add it to the encoded list
696+
if curr_unit[1] is None:
697+
# Conveniently set delta to 1
698+
curr_unit[1] = 1
699+
700+
encoded.append(tuple(curr_unit))
701+
return encoded
702+
703+
704+
class _NodeElem:
705+
def __init__(self, nodename):
706+
m = re.search('(^\D+)(\d+)', nodename)
707+
if m is None:
708+
self._basename = nodename
709+
self._nodeid = -1
710+
else:
711+
self._basename = m.group(1)
712+
self._nodeid = int(m.group(2).lstrip('0'))
713+
714+
@property
715+
def basename(self):
716+
return self._basename
717+
718+
@property
719+
def nodeid(self):
720+
return self._nodeid
721+
722+
def __repr__(self):
723+
return f'{type(self)}({self.basename}, {self.nodeid})'
724+
725+
726+
def _parse_node(nodename):
727+
m = re.search('(^\D+)(\d+)', nodename)
728+
if m is None:
729+
basename = nodename
730+
width = 0
731+
nodeid = None
732+
else:
733+
basename = m.group(1)
734+
_id = m.group(2).lstrip('0')
735+
if _id == '':
736+
# This is to cover nodes with id=0, e.g., x000
737+
_id = '0'
738+
739+
nodeid = int(_id)
740+
width = len(m.group(2))
741+
742+
return basename, width, nodeid
743+
744+
745+
def _count_digits(n):
746+
'''Count digits of a decimal number.'''
747+
748+
num_digits = 1
749+
while n > 10:
750+
n /= 10
751+
num_digits += 1
752+
753+
return num_digits
754+
755+
756+
def _common_prefix(s1, s2):
757+
pos = 0
758+
for i in range(min(len(s1), len(s2))):
759+
if s1[i] != s2[i]:
760+
break
761+
762+
pos += 1
763+
764+
return s1[:pos], s1[pos:], s2[pos:]
765+
766+
767+
class _NodeGroup:
768+
def __init__(self, name, width):
769+
self.__name = name
770+
self.__width = width
771+
self.__nodes = []
772+
773+
@property
774+
def name(self):
775+
return self.__name
776+
777+
@property
778+
def width(self):
779+
return self.__width
780+
781+
@property
782+
def nodes(self):
783+
return self.__nodes
784+
785+
def add(self, nid):
786+
self.__nodes.append(nid)
787+
788+
def __str__(self):
789+
abbrev = []
790+
encoded = _rl_encode(_delta_encode(self.nodes))
791+
for unit in encoded:
792+
start, delta, size = unit
793+
if size == 1:
794+
s_start = str(start).zfill(self.width)
795+
abbrev.append(f'{self.name}{s_start}')
796+
elif delta != 1:
797+
# We simply unpack node lists with delta != 1
798+
for i in range(size):
799+
s_start = str(start + i*delta).zfill(self.width)
800+
abbrev.append(f'{self.name}{s_start}')
801+
else:
802+
last = start + delta*(size-1)
803+
digits_last = _count_digits(last)
804+
pad = self.width - digits_last
805+
nd_range = self.name
806+
if pad > 0:
807+
for _ in range(pad):
808+
nd_range += '0'
809+
810+
s_first = str(start).zfill(digits_last)
811+
s_last = str(last)
812+
prefix, s_first, s_last = _common_prefix(s_first, s_last)
813+
nd_range += f'{prefix}[{s_first}-{s_last}]'
814+
abbrev.append(nd_range)
815+
816+
return ','.join(abbrev)
817+
818+
def __hash__(self):
819+
return hash(self.name) ^ hash(self.width)
820+
821+
def __eq__(self, other):
822+
if not isinstance(other, _NodeGroup):
823+
return NotImplemented
824+
825+
return self.name == other.name and self.width == other.width
826+
827+
828+
def nodelist_abbrev(nodes):
829+
'''Create an abbreviated string representation of the node list.
830+
831+
For example, the node list
832+
833+
.. code-block:: python
834+
835+
['nid001', 'nid002', 'nid010', 'nid011', 'nid012', 'nid510', 'nid511']
836+
837+
will be abbreviated as follows:
838+
839+
.. code-block:: none
840+
841+
nid00[1-2],nid0[10-12],nid51[0-1]
842+
843+
844+
.. versionadded:: 3.5.3
845+
846+
:arg nodes: The node list to abbreviate.
847+
:returns: The abbreviated list representation.
848+
849+
'''
850+
851+
# The algorithm used for abbreviating the list is a standard index
852+
# compression algorithm, the run-length encoding. We first delta encode
853+
# the nodes based on their id, which we retrieve from their name, and then
854+
# run-length encode the list of deltas. The resulting run-length-encoded
855+
# units are then used to generate the abbreviated representation using
856+
# some formatting sugar. The abbreviation is handled in the `__str__()`
857+
# function of the `_NodeGroup`. The purpose of the `_NodeGroup` is to
858+
# group nodes in the list that belong to the same family, namely have the
859+
# same prefix. We then apply the run-length encoding to each group
860+
# independently.
861+
862+
if isinstance(nodes, str):
863+
raise TypeError('nodes argument cannot be a string')
864+
865+
if not isinstance(nodes, collections.abc.Sequence):
866+
raise TypeError('nodes argument must be a Sequence')
867+
868+
node_groups = {}
869+
for n in sorted(nodes):
870+
basename, width, nid = _parse_node(n)
871+
ng = _NodeGroup(basename, width)
872+
node_groups.setdefault(ng, ng)
873+
node_groups[ng].add(nid)
874+
875+
return ','.join(str(ng) for ng in node_groups)
876+
877+
646878
class ScopedDict(UserDict):
647879
'''This is a special dictionary that imposes scopes on its keys.
648880

unittests/test_utility.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,3 +1723,42 @@ def foo():
17231723
assert util.is_copyable(len)
17241724
assert util.is_copyable(int)
17251725
assert not util.is_copyable(foo())
1726+
1727+
1728+
def test_nodelist_abbrev():
1729+
nid_nodes = [f'nid{n:03}' for n in range(5, 20)]
1730+
cid_nodes = [f'cid{n:03}' for n in range(20)]
1731+
1732+
random.shuffle(nid_nodes)
1733+
random.shuffle(cid_nodes)
1734+
nid_nodes.insert(0, 'nid002')
1735+
nid_nodes.insert(0, 'nid001')
1736+
nid_nodes.append('nid125')
1737+
cid_nodes += ['cid055', 'cid056']
1738+
1739+
all_nodes = nid_nodes + cid_nodes
1740+
random.shuffle(all_nodes)
1741+
1742+
nodelist = util.nodelist_abbrev
1743+
assert nodelist(nid_nodes) == 'nid00[1-2],nid0[05-19],nid125'
1744+
assert nodelist(cid_nodes) == 'cid0[00-19],cid05[5-6]'
1745+
assert nodelist(all_nodes) == (
1746+
'cid0[00-19],cid05[5-6],nid00[1-2],nid0[05-19],nid125'
1747+
)
1748+
1749+
# Test non-contiguous nodes
1750+
nid_nodes = []
1751+
for i in range(3):
1752+
nid_nodes += [f'nid{n:03}' for n in range(10*i, 10*i+5)]
1753+
1754+
random.shuffle(nid_nodes)
1755+
assert nodelist(nid_nodes) == 'nid00[0-4],nid01[0-4],nid02[0-4]'
1756+
assert nodelist(['nid01', 'nid10', 'nid20']) == 'nid01,nid10,nid20'
1757+
assert nodelist([]) == ''
1758+
assert nodelist(['nid001']) == 'nid001'
1759+
1760+
with pytest.raises(TypeError, match='nodes argument must be a Sequence'):
1761+
nodelist(1)
1762+
1763+
with pytest.raises(TypeError, match='nodes argument cannot be a string'):
1764+
nodelist('foo')

0 commit comments

Comments
 (0)