Skip to content

Commit 91f481f

Browse files
author
Vasileios Karakasis
authored
Merge pull request #1912 from jgphpc/nodelist
[feat] Abbreviate node lists in `FAILURE INFO` reports
2 parents e8a81b3 + f0e8d50 commit 91f481f

File tree

3 files changed

+283
-2
lines changed

3 files changed

+283
-2
lines changed

reframe/frontend/statistics.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import traceback
88
import reframe.core.runtime as rt
99
import reframe.core.exceptions as errors
10+
import reframe.utility as util
1011

1112

1213
class TestStats:
@@ -216,8 +217,9 @@ def print_failure_report(self, printer):
216217
printer.info(f" * System partition: {r['system']}")
217218
printer.info(f" * Environment: {r['environment']}")
218219
printer.info(f" * Stage directory: {r['stagedir']}")
219-
nodelist = ','.join(r['nodelist']) if r['nodelist'] else None
220-
printer.info(f" * Node list: {nodelist}")
220+
printer.info(
221+
f" * Node list: {util.nodelist_abbrev(r['nodelist'])}"
222+
)
221223
job_type = 'local' if r['scheduler'] == 'local' else 'batch job'
222224
jobid = r['jobid']
223225
printer.info(f" * Job type: {job_type} (id={r['jobid']})")

reframe/utility/__init__.py

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,243 @@ def _is_valid_for_env(m, e):
643643
yield (p.fullname, e.name, m)
644644

645645

646+
def _delta_encode(seq):
647+
'''Delta-encode sequence.
648+
649+
The input list must be at least of size 1.
650+
651+
Example of delta encoding:
652+
653+
- Input list:
654+
1 2 5 6 7 8 9 125
655+
656+
- Output list:
657+
1 1 3 1 1 1 1 106
658+
^
659+
|
660+
First element
661+
of the original list.
662+
663+
:returns: the encoded list. The first element of the encoded sequence is
664+
the first element of the original sequence.
665+
666+
'''
667+
668+
assert len(seq) >= 1
669+
670+
ret = [seq[0]]
671+
for i in range(1, len(seq)):
672+
ret.append(seq[i] - seq[i-1])
673+
674+
return ret
675+
676+
677+
def _rl_encode(seq):
678+
'''Run-length encode a delta-encoded sequence.
679+
680+
The input list must be at least of size 1.
681+
682+
Example of run-length encoding:
683+
684+
- Original list:
685+
1 2 5 6 7 8 9 125
686+
687+
- Delta-encoded list:
688+
1 1 3 1 1 1 1 106
689+
690+
- Run-length-encoded list:
691+
692+
(1,1,2), (5,1,5), (125,1,1)
693+
694+
For convenience, in each RLE unit we use the first element of the original
695+
unit and not the delta value from the previous unit.
696+
697+
:returns: the encoded list. Each element of the list is a three-tuple
698+
containing the first element of the unit, the delta value of the unit
699+
and its length.
700+
701+
'''
702+
assert len(seq) >= 1
703+
704+
encoded = []
705+
curr_unit = [seq[0], 1, 1] # current RLE unit
706+
for delta in seq[1:]:
707+
uelem, udelta, ulen = curr_unit
708+
if udelta is None:
709+
curr_unit[1] = delta
710+
curr_unit[2] += 1
711+
elif udelta != delta:
712+
# New unit; we don't set the delta of the new unit here, because
713+
# `delta` is just the jump for the previous unit. The length of
714+
# the unit is initialized to one, because the last processed
715+
# element *is* part of the new unit.
716+
encoded.append(tuple(curr_unit))
717+
curr_unit = [uelem + udelta*(ulen-1) + delta, None, 1]
718+
else:
719+
# Increase unit
720+
curr_unit[2] += 1
721+
722+
# Fix last unit and add it to the encoded list
723+
if curr_unit[1] is None:
724+
# Conveniently set delta to 1
725+
curr_unit[1] = 1
726+
727+
encoded.append(tuple(curr_unit))
728+
return encoded
729+
730+
731+
def _parse_node(nodename):
732+
m = re.search(r'(^\D+)(\d+)', nodename)
733+
if m is None:
734+
basename = nodename
735+
width = 0
736+
nodeid = None
737+
else:
738+
basename = m.group(1)
739+
_id = m.group(2).lstrip('0')
740+
if _id == '':
741+
# This is to cover nodes with id=0, e.g., x000
742+
_id = '0'
743+
744+
nodeid = int(_id)
745+
width = len(m.group(2))
746+
747+
return basename, width, nodeid
748+
749+
750+
def _count_digits(n):
751+
'''Count digits of a decimal number.'''
752+
753+
num_digits = 1
754+
while n > 10:
755+
n /= 10
756+
num_digits += 1
757+
758+
return num_digits
759+
760+
761+
def _common_prefix(s1, s2):
762+
pos = 0
763+
for i in range(min(len(s1), len(s2))):
764+
if s1[i] != s2[i]:
765+
break
766+
767+
pos += 1
768+
769+
return s1[:pos], s1[pos:], s2[pos:]
770+
771+
772+
class _NodeGroup:
773+
def __init__(self, name, width):
774+
self.__name = name
775+
self.__width = width
776+
self.__nodes = []
777+
778+
@property
779+
def name(self):
780+
return self.__name
781+
782+
@property
783+
def width(self):
784+
return self.__width
785+
786+
@property
787+
def nodes(self):
788+
return self.__nodes
789+
790+
def add(self, nid):
791+
self.__nodes.append(nid)
792+
793+
def __str__(self):
794+
abbrev = []
795+
encoded = _rl_encode(_delta_encode(self.nodes))
796+
for unit in encoded:
797+
start, delta, size = unit
798+
if size == 1:
799+
s_start = str(start).zfill(self.width)
800+
abbrev.append(f'{self.name}{s_start}')
801+
elif delta != 1:
802+
# We simply unpack node lists with delta != 1
803+
for i in range(size):
804+
s_start = str(start + i*delta).zfill(self.width)
805+
abbrev.append(f'{self.name}{s_start}')
806+
else:
807+
last = start + delta*(size-1)
808+
digits_last = _count_digits(last)
809+
pad = self.width - digits_last
810+
nd_range = self.name
811+
if pad > 0:
812+
for _ in range(pad):
813+
nd_range += '0'
814+
815+
s_first = str(start).zfill(digits_last)
816+
s_last = str(last)
817+
prefix, s_first, s_last = _common_prefix(s_first, s_last)
818+
nd_range += f'{prefix}[{s_first}-{s_last}]'
819+
abbrev.append(nd_range)
820+
821+
return ','.join(abbrev)
822+
823+
def __hash__(self):
824+
return hash(self.name) ^ hash(self.width)
825+
826+
def __eq__(self, other):
827+
if not isinstance(other, _NodeGroup):
828+
return NotImplemented
829+
830+
return self.name == other.name and self.width == other.width
831+
832+
833+
def nodelist_abbrev(nodes):
834+
'''Create an abbreviated string representation of the node list.
835+
836+
For example, the node list
837+
838+
.. code-block:: python
839+
840+
['nid001', 'nid002', 'nid010', 'nid011', 'nid012', 'nid510', 'nid511']
841+
842+
will be abbreviated as follows:
843+
844+
.. code-block:: none
845+
846+
nid00[1-2],nid0[10-12],nid51[0-1]
847+
848+
849+
.. versionadded:: 3.5.3
850+
851+
:arg nodes: The node list to abbreviate.
852+
:returns: The abbreviated list representation.
853+
854+
'''
855+
856+
# The algorithm used for abbreviating the list is a standard index
857+
# compression algorithm, the run-length encoding. We first delta encode
858+
# the nodes based on their id, which we retrieve from their name, and then
859+
# run-length encode the list of deltas. The resulting run-length-encoded
860+
# units are then used to generate the abbreviated representation using
861+
# some formatting sugar. The abbreviation is handled in the `__str__()`
862+
# function of the `_NodeGroup`. The purpose of the `_NodeGroup` is to
863+
# group nodes in the list that belong to the same family, namely have the
864+
# same prefix. We then apply the run-length encoding to each group
865+
# independently.
866+
867+
if isinstance(nodes, str):
868+
raise TypeError('nodes argument cannot be a string')
869+
870+
if not isinstance(nodes, collections.abc.Sequence):
871+
raise TypeError('nodes argument must be a Sequence')
872+
873+
node_groups = {}
874+
for n in sorted(nodes):
875+
basename, width, nid = _parse_node(n)
876+
ng = _NodeGroup(basename, width)
877+
node_groups.setdefault(ng, ng)
878+
node_groups[ng].add(nid)
879+
880+
return ','.join(str(ng) for ng in node_groups)
881+
882+
646883
class ScopedDict(UserDict):
647884
'''This is a special dictionary that imposes scopes on its keys.
648885

unittests/test_utility.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1712,3 +1712,45 @@ def foo():
17121712
assert util.is_copyable(len)
17131713
assert util.is_copyable(int)
17141714
assert not util.is_copyable(foo())
1715+
1716+
1717+
def test_nodelist_abbrev():
1718+
nid_nodes = [f'nid{n:03}' for n in range(5, 20)]
1719+
cid_nodes = [f'cid{n:03}' for n in range(20)]
1720+
1721+
random.shuffle(nid_nodes)
1722+
random.shuffle(cid_nodes)
1723+
nid_nodes.insert(0, 'nid002')
1724+
nid_nodes.insert(0, 'nid001')
1725+
nid_nodes.append('nid125')
1726+
cid_nodes += ['cid055', 'cid056']
1727+
1728+
all_nodes = nid_nodes + cid_nodes
1729+
random.shuffle(all_nodes)
1730+
1731+
nodelist = util.nodelist_abbrev
1732+
assert nodelist(nid_nodes) == 'nid00[1-2],nid0[05-19],nid125'
1733+
assert nodelist(cid_nodes) == 'cid0[00-19],cid05[5-6]'
1734+
assert nodelist(all_nodes) == (
1735+
'cid0[00-19],cid05[5-6],nid00[1-2],nid0[05-19],nid125'
1736+
)
1737+
1738+
# Test non-contiguous nodes
1739+
nid_nodes = []
1740+
for i in range(3):
1741+
nid_nodes += [f'nid{n:03}' for n in range(10*i, 10*i+5)]
1742+
1743+
random.shuffle(nid_nodes)
1744+
assert nodelist(nid_nodes) == 'nid00[0-4],nid01[0-4],nid02[0-4]'
1745+
assert nodelist(['nid01', 'nid10', 'nid20']) == 'nid01,nid10,nid20'
1746+
assert nodelist([]) == ''
1747+
assert nodelist(['nid001']) == 'nid001'
1748+
1749+
# Test node duplicates
1750+
assert nodelist(['nid001', 'nid001', 'nid002']) == 'nid001,nid00[1-2]'
1751+
1752+
with pytest.raises(TypeError, match='nodes argument must be a Sequence'):
1753+
nodelist(1)
1754+
1755+
with pytest.raises(TypeError, match='nodes argument cannot be a string'):
1756+
nodelist('foo')

0 commit comments

Comments
 (0)