Skip to content

Commit c0e5aaa

Browse files
authored
Merge pull request #4758 from Flamefire/multi-line-replacement
enhance `apply_regex_substitutions` to support use of multi-line patterns, requiring matching all patterns in each file, and use pre-compiled regular expressions
2 parents 5fd677b + a34a9c2 commit c0e5aaa

File tree

2 files changed

+98
-41
lines changed

2 files changed

+98
-41
lines changed

easybuild/tools/filetools.py

Lines changed: 59 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1643,39 +1643,52 @@ def apply_patch(patch_file, dest, fn=None, copy=False, level=None, use_git_am=Fa
16431643
return True
16441644

16451645

1646-
def apply_regex_substitutions(paths, regex_subs, backup='.orig.eb', on_missing_match=None):
1646+
def apply_regex_substitutions(paths, regex_subs, backup='.orig.eb',
1647+
on_missing_match=None, match_all=False, single_line=True):
16471648
"""
16481649
Apply specified list of regex substitutions.
16491650
16501651
:param paths: list of paths to files to patch (or just a single filepath)
1651-
:param regex_subs: list of substitutions to apply, specified as (<regexp pattern>, <replacement string>)
1652+
:param regex_subs: list of substitutions to apply,
1653+
specified as (<regexp pattern or regex instance>, <replacement string>)
16521654
:param backup: create backup of original file with specified suffix (no backup if value evaluates to False)
16531655
:param on_missing_match: Define what to do when no match was found in the file.
16541656
Can be 'error' to raise an error, 'warn' to print a warning or 'ignore' to do nothing
16551657
Defaults to the value of --strict
1658+
:param match_all: Expect to match all patterns in all files
1659+
instead of at least one per file for error/warning reporting
1660+
:param single_line: Replace first match of each pattern for each line in the order of the patterns.
1661+
If False the patterns are applied in order to the full text and may match line breaks.
16561662
"""
16571663
if on_missing_match is None:
16581664
on_missing_match = build_option('strict')
16591665
allowed_values = (ERROR, IGNORE, WARN)
16601666
if on_missing_match not in allowed_values:
1661-
raise EasyBuildError('Invalid value passed to on_missing_match: %s (allowed: %s)',
1662-
on_missing_match, ', '.join(allowed_values))
1667+
raise ValueError('Invalid value passed to on_missing_match: %s (allowed: %s)',
1668+
on_missing_match, ', '.join(allowed_values))
16631669

16641670
if isinstance(paths, string_type):
16651671
paths = [paths]
1672+
if (not isinstance(regex_subs, (list, tuple)) or
1673+
not all(isinstance(sub, (list, tuple)) and len(sub) == 2 for sub in regex_subs)):
1674+
raise ValueError('Parameter regex_subs must be a list of 2-element tuples. Got:', regex_subs)
1675+
1676+
flags = 0 if single_line else re.M
1677+
compiled_regex_subs = [(re.compile(regex, flags) if isinstance(regex, str) else regex, subtxt)
1678+
for (regex, subtxt) in regex_subs]
16661679

16671680
# only report when in 'dry run' mode
16681681
if build_option('extended_dry_run'):
16691682
paths_str = ', '.join(paths)
16701683
dry_run_msg("applying regex substitutions to file(s): %s" % paths_str, silent=build_option('silent'))
1671-
for regex, subtxt in regex_subs:
1672-
dry_run_msg(" * regex pattern '%s', replacement string '%s'" % (regex, subtxt))
1684+
for regex, subtxt in compiled_regex_subs:
1685+
dry_run_msg(" * regex pattern '%s', replacement string '%s'" % (regex.pattern, subtxt))
16731686

16741687
else:
1675-
_log.info("Applying following regex substitutions to %s: %s", paths, regex_subs)
1676-
1677-
compiled_regex_subs = [(re.compile(regex), subtxt) for (regex, subtxt) in regex_subs]
1688+
_log.info("Applying following regex substitutions to %s: %s",
1689+
paths, [(regex.pattern, subtxt) for regex, subtxt in compiled_regex_subs])
16781690

1691+
replacement_failed_msgs = []
16791692
for path in paths:
16801693
try:
16811694
# make sure that file can be opened in text mode;
@@ -1695,32 +1708,49 @@ def apply_regex_substitutions(paths, regex_subs, backup='.orig.eb', on_missing_m
16951708
if backup:
16961709
copy_file(path, path + backup)
16971710
replacement_msgs = []
1711+
replaced = [False] * len(compiled_regex_subs)
16981712
with open_file(path, 'w') as out_file:
1699-
lines = txt_utf8.split('\n')
1700-
del txt_utf8
1701-
for line_id, line in enumerate(lines):
1702-
for regex, subtxt in compiled_regex_subs:
1703-
match = regex.search(line)
1704-
if match:
1713+
if single_line:
1714+
lines = txt_utf8.split('\n')
1715+
del txt_utf8
1716+
for line_id, line in enumerate(lines):
1717+
for i, (regex, subtxt) in enumerate(compiled_regex_subs):
1718+
match = regex.search(line)
1719+
if match:
1720+
origtxt = match.group(0)
1721+
replacement_msgs.append("Replaced in line %d: '%s' -> '%s'" %
1722+
(line_id + 1, origtxt, subtxt))
1723+
replaced[i] = True
1724+
line = regex.sub(subtxt, line)
1725+
lines[line_id] = line
1726+
out_file.write('\n'.join(lines))
1727+
else:
1728+
for i, (regex, subtxt) in enumerate(compiled_regex_subs):
1729+
def do_replace(match):
17051730
origtxt = match.group(0)
1706-
replacement_msgs.append("Replaced in line %d: '%s' -> '%s'" %
1707-
(line_id + 1, origtxt, subtxt))
1708-
line = regex.sub(subtxt, line)
1709-
lines[line_id] = line
1710-
out_file.write('\n'.join(lines))
1731+
# pylint: disable=cell-var-from-loop
1732+
cur_subtxt = match.expand(subtxt)
1733+
# pylint: disable=cell-var-from-loop
1734+
replacement_msgs.append("Replaced: '%s' -> '%s'" % (origtxt, cur_subtxt))
1735+
return cur_subtxt
1736+
txt_utf8, replaced[i] = regex.subn(do_replace, txt_utf8)
1737+
out_file.write(txt_utf8)
17111738
if replacement_msgs:
17121739
_log.info('Applied the following substitutions to %s:\n%s', path, '\n'.join(replacement_msgs))
1713-
else:
1714-
msg = 'Nothing found to replace in %s' % path
1715-
if on_missing_match == ERROR:
1716-
raise EasyBuildError(msg)
1717-
elif on_missing_match == WARN:
1718-
_log.warning(msg)
1719-
else:
1720-
_log.info(msg)
1721-
1740+
if (match_all and not all(replaced)) or (not match_all and not any(replaced)):
1741+
errors = ["Nothing found to replace '%s'" % regex.pattern
1742+
for cur_replaced, (regex, _) in zip(replaced, compiled_regex_subs) if not cur_replaced]
1743+
replacement_failed_msgs.append(', '.join(errors) + ' in ' + path)
17221744
except (IOError, OSError) as err:
17231745
raise EasyBuildError("Failed to patch %s: %s", path, err)
1746+
if replacement_failed_msgs:
1747+
msg = '\n'.join(replacement_failed_msgs)
1748+
if on_missing_match == ERROR:
1749+
raise EasyBuildError(msg)
1750+
elif on_missing_match == WARN:
1751+
_log.warning(msg)
1752+
else:
1753+
_log.info(msg)
17241754

17251755

17261756
def modify_env(old, new):

test/framework/filetools.py

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,16 +1443,27 @@ def test_apply_regex_substitutions(self):
14431443
# Check handling of on_missing_match
14441444
ft.write_file(testfile, testtxt)
14451445
regex_subs_no_match = [('Not there', 'Not used')]
1446-
error_pat = 'Nothing found to replace in %s' % testfile
1446+
error_pat = "Nothing found to replace 'Not there' in %s" % testfile
14471447
# Error
14481448
self.assertErrorRegex(EasyBuildError, error_pat, ft.apply_regex_substitutions, testfile, regex_subs_no_match,
14491449
on_missing_match=run.ERROR)
1450+
# First matches, but 2nd not
1451+
regex_subs_part_match = [regex_subs[0], ('Not there', 'Not used')]
1452+
self.assertErrorRegex(EasyBuildError, error_pat, ft.apply_regex_substitutions, testfile, regex_subs_part_match,
1453+
on_missing_match=run.ERROR, match_all=True)
1454+
# First matched so OK with match_all
1455+
ft.apply_regex_substitutions(testfile, regex_subs_part_match,
1456+
on_missing_match=run.ERROR, match_all=False)
14501457

14511458
# Warn
14521459
with self.log_to_testlogfile():
14531460
ft.apply_regex_substitutions(testfile, regex_subs_no_match, on_missing_match=run.WARN)
14541461
logtxt = ft.read_file(self.logfile)
14551462
self.assertIn('WARNING ' + error_pat, logtxt)
1463+
with self.log_to_testlogfile():
1464+
ft.apply_regex_substitutions(testfile, regex_subs_part_match, on_missing_match=run.WARN, match_all=True)
1465+
logtxt = ft.read_file(self.logfile)
1466+
self.assertIn('WARNING ' + error_pat, logtxt)
14561467

14571468
# Ignore
14581469
with self.log_to_testlogfile():
@@ -1465,6 +1476,24 @@ def test_apply_regex_substitutions(self):
14651476
path = os.path.join(self.test_prefix, 'nosuchfile.txt')
14661477
self.assertErrorRegex(EasyBuildError, error_pat, ft.apply_regex_substitutions, path, regex_subs)
14671478

1479+
# Replace multi-line strings
1480+
testtxt = "This si wrong\nBut mkae right\nLeave this!"
1481+
expected_testtxt = 'This is wrong.\nBut make right\nLeave this!'
1482+
ft.write_file(testfile, testtxt)
1483+
repl = ('This si( .*)\n(.*)mkae right$', 'This is wrong.\nBut make right')
1484+
ft.apply_regex_substitutions(testfile, [repl], backup=False, on_missing_match=ERROR, single_line=False)
1485+
new_testtxt = ft.read_file(testfile)
1486+
self.assertEqual(new_testtxt, expected_testtxt)
1487+
# Supports capture groups
1488+
ft.write_file(testfile, testtxt)
1489+
repls = [
1490+
('This si( .*)\n(.*)mkae right$', r'This is\1.\n\2make right'),
1491+
('Lea(ve)', r'Do \g<0>\1'), # Reference to full match
1492+
]
1493+
ft.apply_regex_substitutions(testfile, repls, backup=False, on_missing_match=ERROR, single_line=False)
1494+
new_testtxt = ft.read_file(testfile)
1495+
self.assertEqual(new_testtxt, expected_testtxt.replace('Leave', 'Do Leaveve'))
1496+
14681497
# make sure apply_regex_substitutions can patch files that include UTF-8 characters
14691498
testtxt = b"foo \xe2\x80\x93 bar" # This is an UTF-8 "-"
14701499
ft.write_file(testfile, testtxt)
@@ -1485,34 +1514,32 @@ def test_apply_regex_substitutions(self):
14851514

14861515
# also test apply_regex_substitutions with a *list* of paths
14871516
# cfr. https://github.com/easybuilders/easybuild-framework/issues/3493
1517+
# and a compiled regex
14881518
test_dir = os.path.join(self.test_prefix, 'test_dir')
14891519
test_file1 = os.path.join(test_dir, 'one.txt')
14901520
test_file2 = os.path.join(test_dir, 'two.txt')
14911521
ft.write_file(test_file1, "Donald is an elephant")
14921522
ft.write_file(test_file2, "2 + 2 = 5")
14931523
regexs = [
1494-
('Donald', 'Dumbo'),
1524+
(re.compile('donald', re.I), 'Dumbo'), # Only matches if this is used as-is
14951525
('= 5', '= 4'),
14961526
]
14971527
ft.apply_regex_substitutions([test_file1, test_file2], regexs)
14981528

14991529
# also check dry run mode
15001530
init_config(build_options={'extended_dry_run': True})
1501-
self.mock_stderr(True)
1502-
self.mock_stdout(True)
1503-
ft.apply_regex_substitutions([test_file1, test_file2], regexs)
1504-
stderr, stdout = self.get_stderr(), self.get_stdout()
1505-
self.mock_stderr(False)
1506-
self.mock_stdout(False)
1531+
with self.mocked_stdout_stderr():
1532+
ft.apply_regex_substitutions([test_file1, test_file2], regexs)
1533+
stderr, stdout = self.get_stderr(), self.get_stdout()
15071534

15081535
self.assertFalse(stderr)
1509-
regex = re.compile('\n'.join([
1536+
regex = '\n'.join([
15101537
r"applying regex substitutions to file\(s\): .*/test_dir/one.txt, .*/test_dir/two.txt",
1511-
r" \* regex pattern 'Donald', replacement string 'Dumbo'",
1538+
r" \* regex pattern 'donald', replacement string 'Dumbo'",
15121539
r" \* regex pattern '= 5', replacement string '= 4'",
15131540
'',
1514-
]))
1515-
self.assertTrue(regex.search(stdout), "Pattern '%s' should be found in: %s" % (regex.pattern, stdout))
1541+
])
1542+
self.assertTrue(re.search(regex, stdout), "Pattern '%s' should be found in: %s" % (regex, stdout))
15161543

15171544
def test_find_flexlm_license(self):
15181545
"""Test find_flexlm_license function."""

0 commit comments

Comments
 (0)