Skip to content

Commit 46fbddd

Browse files
author
Vasileios Karakasis
authored
Merge pull request #1433 from teojgo/feat/extractall_multiple
[feat] Support multiple tags in `extract*` sanity functions
2 parents 7348de4 + bf35b98 commit 46fbddd

File tree

2 files changed

+141
-25
lines changed

2 files changed

+141
-25
lines changed

reframe/utility/sanity.py

Lines changed: 84 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# SPDX-License-Identifier: BSD-3-Clause
55

66
import builtins
7+
import collections
78
import glob as pyglob
89
import itertools
910
import re
@@ -520,38 +521,83 @@ def findall(patt, filename, encoding='utf-8'):
520521
return list(evaluate(x) for x in finditer(patt, filename, encoding))
521522

522523

523-
@deferrable
524-
def extractiter(patt, filename, tag=0, conv=None, encoding='utf-8'):
525-
'''Get an iterator over the values extracted from the capturing group
526-
``tag`` of a matching regex ``patt`` in the file ``filename``.
524+
def _callable_name(fn):
525+
fn_name = '<unknown>'
526+
try:
527+
# Assume fn is standard function
528+
fn_name = fn.__name__
529+
except AttributeError:
530+
try:
531+
# Assume fn is callable object
532+
fn_name = fn.__class__.__name__
533+
except AttributeError:
534+
pass
535+
536+
return fn_name
537+
538+
539+
def _extractiter_singletag(patt, filename, tag, conv, encoding):
540+
if isinstance(conv, collections.Iterable):
541+
raise SanityError(f'multiple conversion functions given for the '
542+
f'single capturing group {tag!r}')
527543

528-
This function is equivalent to :func:`extractall` except that it returns
529-
a generator object, instead of a list, which you can use to iterate over
530-
the extracted values.
531-
'''
532544
for m in finditer(patt, filename, encoding):
533545
try:
534546
val = m.group(tag)
535547
except (IndexError, KeyError):
536-
raise SanityError(
537-
"no such group in pattern `%s': %s" % (patt, tag))
548+
raise SanityError(f'no such group in pattern {patt!r}: {tag}')
538549

539550
try:
540551
yield conv(val) if callable(conv) else val
541552
except ValueError:
542-
fn_name = '<unknown>'
553+
fn_name = _callable_name(conv)
554+
raise SanityError(
555+
f'could not convert value {val!r} using {fn_name}()'
556+
)
557+
558+
559+
def _extractiter_multitag(patt, filename, tags, conv, encoding):
560+
for m in finditer(patt, filename, encoding):
561+
val = []
562+
for t in tags:
543563
try:
544-
# Assume conv is standard function
545-
fn_name = conv.__name__
546-
except AttributeError:
547-
try:
548-
# Assume conv is callable object
549-
fn_name = conv.__class__.__name__
550-
except AttributeError:
551-
pass
564+
val.append(m.group(t))
565+
except (IndexError, KeyError):
566+
raise SanityError(f'no such group in pattern {patt!r}: {t}')
567+
568+
converted_vals = []
569+
if not isinstance(conv, collections.Iterable):
570+
conv = [conv] * builtins.len(val)
571+
elif builtins.len(conv) > builtins.len(val):
572+
conv = conv[:builtins.len(val)]
573+
574+
# Use the last function in case we have less conversion functions than
575+
# tags
576+
for v, c in itertools.zip_longest(val, conv, fillvalue=conv[-1]):
577+
try:
578+
converted_vals.append(c(v) if callable(c) else v)
579+
except ValueError:
580+
fn_name = _callable_name(conv)
581+
raise SanityError(
582+
f'could not convert value {v!r} using {fn_name}()'
583+
)
584+
585+
yield tuple(converted_vals)
586+
587+
588+
@deferrable
589+
def extractiter(patt, filename, tag=0, conv=None, encoding='utf-8'):
590+
'''Get an iterator over the values extracted from the capturing group
591+
``tag`` of a matching regex ``patt`` in the file ``filename``.
552592
553-
raise SanityError("could not convert value `%s' using `%s()'" %
554-
(val, fn_name))
593+
This function is equivalent to :func:`extractall` except that it returns
594+
a generator object, instead of a list, which you can use to iterate over
595+
the extracted values.
596+
'''
597+
if isinstance(tag, collections.Iterable) and not isinstance(tag, str):
598+
yield from _extractiter_multitag(patt, filename, tag, conv, encoding)
599+
else:
600+
yield from _extractiter_singletag(patt, filename, tag, conv, encoding)
555601

556602

557603
@deferrable
@@ -572,11 +618,24 @@ def extractall(patt, filename, tag=0, conv=None, encoding='utf-8'):
572618
Group ``0`` refers always to the whole match.
573619
Since the file is processed line by line, this means that group ``0``
574620
returns the whole line that was matched.
575-
:arg conv: A callable that takes a single argument and returns a new value.
576-
If provided, it will be used to convert the extracted values before
577-
returning them.
578-
:returns: A list of the extracted values from the matched regex.
621+
:arg conv: A callable or iterable of callables taking a single argument
622+
and returning a new value.
623+
If not an iterable, it will be used to convert the extracted values for
624+
all the capturing groups specified in ``tag``.
625+
Otherwise, each conversion function will be used to convert the value
626+
extracted from the corresponding capturing group in ``tag``.
627+
If more conversion functions are supplied than the corresponding
628+
capturing groups in ``tag``, the last conversion function will be used
629+
for the additional capturing groups.
630+
:returns: A list of tuples of converted values extracted from the
631+
capturing groups specified in ``tag``, if ``tag`` is an iterable.
632+
Otherwise, a list of the converted values extracted from the single
633+
capturing group specified in ``tag``.
579634
:raises reframe.core.exceptions.SanityError: In case of errors.
635+
636+
.. versionchanged:: 3.1
637+
Multiple regex capturing groups are now supporetd via ``tag`` and
638+
multiple conversion functions can be used in ``conv``.
580639
'''
581640
return list(evaluate(x)
582641
for x in extractiter(patt, filename, tag, conv, encoding))

unittests/test_sanity_functions.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,9 @@ def setUp(self):
597597
fp.write('Step: 1\n')
598598
fp.write('Step: 2\n')
599599
fp.write('Step: 3\n')
600+
fp.write('Number: 1 2\n')
601+
fp.write('Number: 2 4\n')
602+
fp.write('Number: 3 6\n')
600603

601604
def tearDown(self):
602605
os.remove(self.tempfile)
@@ -650,6 +653,60 @@ def test_extractall(self):
650653
for expected, v in enumerate(res, start=1):
651654
assert expected == v
652655

656+
def test_extractall_multiple_tags(self):
657+
# Check multiple numeric groups
658+
res = sn.evaluate(sn.extractall(
659+
r'Number: (\d+) (\d+)', self.tempfile, (1, 2)))
660+
for expected, v in enumerate(res, start=1):
661+
assert str(expected) == v[0]
662+
assert str(2*expected) == v[1]
663+
664+
# Check multiple named groups
665+
res = sn.evaluate(sn.extractall(
666+
r'Number: (?P<no1>\d+) (?P<no2>\d+)', self.tempfile,
667+
('no1', 'no2')))
668+
for expected, v in enumerate(res, start=1):
669+
assert str(expected) == v[0]
670+
assert str(2*expected) == v[1]
671+
672+
# Check single convert function
673+
res = sn.evaluate(sn.extractall(r'Number: (?P<no1>\d+) (?P<no2>\d+)',
674+
self.tempfile, ('no1', 'no2'), int))
675+
for expected, v in enumerate(res, start=1):
676+
assert expected == v[0]
677+
assert 2 * expected == v[1]
678+
679+
# Check multiple convert functions
680+
res = sn.evaluate(sn.extractall(r'Number: (?P<no1>\d+) (?P<no2>\d+)',
681+
self.tempfile, ('no1', 'no2'),
682+
(int, float)))
683+
for expected, v in enumerate(res, start=1):
684+
assert expected == v[0]
685+
assert 2 * expected == v[1]
686+
assert isinstance(v[1], float)
687+
688+
# Check more conversion functions than tags
689+
res = sn.evaluate(sn.extractall(r'Number: (?P<no1>\d+) (?P<no2>\d+)',
690+
self.tempfile, ('no1', 'no2'),
691+
[int, float, float, float]))
692+
for expected, v in enumerate(res, start=1):
693+
assert expected == v[0]
694+
assert 2 * expected == v[1]
695+
696+
# Check fewer convert functions than tags
697+
res = sn.evaluate(sn.extractall(r'Number: (?P<no1>\d+) (?P<no2>\d+)',
698+
self.tempfile, ('no1', 'no2'),
699+
[int]))
700+
for expected, v in enumerate(res, start=1):
701+
assert expected == v[0]
702+
assert 2 * expected == v[1]
703+
704+
# Check multiple conversion functions and a single tag
705+
with pytest.raises(SanityError):
706+
res = sn.evaluate(sn.extractall(
707+
r'Number: (?P<no>\d+) \d+', self.tempfile, 'no', [int, float])
708+
)
709+
653710
def test_extractall_encoding(self):
654711
res = sn.evaluate(
655712
sn.extractall('Odyssey', self.utf16_file, encoding='utf-16')

0 commit comments

Comments
 (0)