Skip to content

Commit 8e59206

Browse files
committed
Address review comments: use fixture, parametrize, long-line edge cases
- Convert `_make_invocation` helper to a pytest factory fixture - Parametrize matching tests into a single `test_extract_failures` with descriptive IDs instead of separate test functions - Add long-line edge cases: error embedded within a long line (no newline separator), and word-boundary false positive - Use 30s timeout ceiling instead of 5s to avoid CI flakiness while still catching catastrophic backtracking - Add comments explaining `time.monotonic()` choice - Minor: fix comment typo ("per line" -> "per call") Assisted-by: Claude Code Signed-off-by: Miroslav Vadkerti <mvadkert@redhat.com>
1 parent 4063a35 commit 8e59206

File tree

2 files changed

+101
-64
lines changed

2 files changed

+101
-64
lines changed

tests/unit/test_shell.py

Lines changed: 100 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,120 @@
11
import time
22
from unittest.mock import MagicMock
33

4+
import pytest
5+
46
import tmt.utils
57
from tmt.frameworks.shell import _extract_failures
68
from tmt.utils import Path
79

810

9-
def _make_invocation(log_content: str) -> MagicMock:
10-
"""Create a mock TestInvocation that returns given log content on read."""
11-
mock = MagicMock()
12-
mock.phase.step.plan.execute.read.return_value = log_content
13-
return mock
14-
15-
16-
def test_no_failures() -> None:
17-
invocation = _make_invocation("all good\nnothing wrong here\n")
18-
assert _extract_failures(invocation, Path("dummy.log")) == []
19-
20-
21-
def test_error_match() -> None:
22-
invocation = _make_invocation("line1\nsome error occurred\nline3\n")
23-
result = _extract_failures(invocation, Path("dummy.log"))
24-
assert result == ["some error occurred"]
25-
26-
27-
def test_fail_match() -> None:
28-
invocation = _make_invocation("test passed\ntest fail here\ndone\n")
29-
result = _extract_failures(invocation, Path("dummy.log"))
30-
assert result == ["test fail here"]
31-
32-
33-
def test_case_insensitive() -> None:
34-
invocation = _make_invocation("ERROR: something\nFAIL: test\n")
35-
result = _extract_failures(invocation, Path("dummy.log"))
36-
assert result == ["ERROR: something", "FAIL: test"]
37-
38-
39-
def test_multiple_matches() -> None:
40-
invocation = _make_invocation("ok\nerror one\npass\nfail two\nerror three\n")
41-
result = _extract_failures(invocation, Path("dummy.log"))
42-
assert result == ["error one", "fail two", "error three"]
43-
44-
45-
def test_word_boundary() -> None:
46-
"""Words like 'errorless' or 'failover' should not match."""
47-
invocation = _make_invocation("errorless operation\nfailover complete\n")
48-
assert _extract_failures(invocation, Path("dummy.log")) == []
49-
50-
51-
def test_file_error() -> None:
11+
@pytest.fixture
12+
def make_invocation():
13+
"""Factory fixture creating a mock TestInvocation returning given log content."""
14+
15+
def _factory(log_content: str) -> MagicMock:
16+
mock = MagicMock()
17+
mock.phase.step.plan.execute.read.return_value = log_content
18+
return mock
19+
20+
return _factory
21+
22+
23+
_FAILURE_MATCH_CASES: list[tuple[str, str, list[str]]] = [
24+
('no failures', 'all good\nnothing wrong here\n', []),
25+
('error keyword', 'line1\nsome error occurred\nline3\n', ['some error occurred']),
26+
('fail keyword', 'test passed\ntest fail here\ndone\n', ['test fail here']),
27+
('case insensitive', 'ERROR: something\nFAIL: test\n', ['ERROR: something', 'FAIL: test']),
28+
(
29+
'multiple matches',
30+
'ok\nerror one\npass\nfail two\nerror three\n',
31+
['error one', 'fail two', 'error three'],
32+
),
33+
('word boundary - no false positives', 'errorless operation\nfailover complete\n', []),
34+
]
35+
36+
37+
@pytest.mark.parametrize(
38+
('log_content', 'expected'),
39+
[(log, expected) for _, log, expected in _FAILURE_MATCH_CASES],
40+
ids=[name for name, _, _ in _FAILURE_MATCH_CASES],
41+
)
42+
def test_extract_failures(
43+
make_invocation,
44+
log_content: str,
45+
expected: list[str],
46+
) -> None:
47+
"""Verify _extract_failures matches the correct lines."""
48+
invocation = make_invocation(log_content)
49+
assert _extract_failures(invocation, Path('dummy.log')) == expected
50+
51+
52+
def test_extract_failures_file_error() -> None:
53+
"""Verify _extract_failures returns empty list when the log file cannot be read."""
5254
invocation = MagicMock()
53-
invocation.phase.step.plan.execute.read.side_effect = tmt.utils.FileError("not found")
54-
assert _extract_failures(invocation, Path("dummy.log")) == []
55-
56-
57-
def test_long_lines_performance() -> None:
55+
invocation.phase.step.plan.execute.read.side_effect = tmt.utils.FileError('not found')
56+
assert _extract_failures(invocation, Path('dummy.log')) == []
57+
58+
59+
_LONG_LINE_CASES: list[tuple[str, str, list[str]]] = [
60+
(
61+
'long line without match followed by error line',
62+
'start\n{long}\nsome error here\nend\n',
63+
['some error here'],
64+
),
65+
(
66+
'error embedded in long line without newline separator',
67+
'start\n{long} error in the middle {long}\nend\n',
68+
['{long} error in the middle {long}'],
69+
),
70+
(
71+
'long line with word boundary - no false positive',
72+
'start\n{long}errorless{long}\nend\n',
73+
[],
74+
),
75+
]
76+
77+
78+
@pytest.mark.parametrize(
79+
('log_template', 'expected_template'),
80+
[(log, expected) for _, log, expected in _LONG_LINE_CASES],
81+
ids=[name for name, _, _ in _LONG_LINE_CASES],
82+
)
83+
def test_extract_failures_long_lines(
84+
make_invocation,
85+
log_template: str,
86+
expected_template: list[str],
87+
) -> None:
5888
"""
59-
Regression test: regex must not cause catastrophic backtracking
60-
on very long lines.
89+
Verify correct handling of very long lines.
6190
62-
The original implementation used re.findall(r'.*\\b(?:error|fail)\\b.*', ...)
63-
which caused O(n^2) or worse backtracking on long lines without matches,
91+
The original implementation used ``re.findall(r'.*\\b(?:error|fail)\\b.*', ...)``
92+
which caused catastrophic backtracking on long lines (O(n^2) or worse),
6493
hanging tmt processes for hours on 1M+ character lines (e.g. base64-encoded
6594
in-toto attestation payloads in container build logs).
95+
96+
The current implementation uses ``str.splitlines()`` and per-line
97+
``re.search()`` which processes each line in linear time.
6698
"""
67-
# Build a log with a 1M-character line (similar to base64 attestation data)
68-
long_line = "A" * 1_000_000
69-
log_content = f"start\n{long_line}\nsome error here\nend\n"
99+
long_segment = 'A' * 1_000_000
100+
log_content = log_template.replace('{long}', long_segment)
101+
expected = [line.replace('{long}', long_segment) for line in expected_template]
70102

71-
invocation = _make_invocation(log_content)
103+
invocation = make_invocation(log_content)
72104

105+
# time.monotonic() is the correct choice for elapsed time measurement
106+
# as it is not affected by system clock adjustments.
73107
start = time.monotonic()
74-
result = _extract_failures(invocation, Path("dummy.log"))
108+
result = _extract_failures(invocation, Path('dummy.log'))
75109
elapsed = time.monotonic() - start
76110

77-
# Must complete in under 5 seconds (the old regex took 5+ seconds
78-
# on just 10k characters, and would never complete on 1M characters)
79-
assert elapsed < 5.0, (
80-
f"_extract_failures took {elapsed:.1f}s on a log with a 1M-char line; "
81-
f"likely catastrophic regex backtracking"
111+
assert result == expected
112+
113+
# The old regex would never complete on lines this long — it took 5+
114+
# seconds on just 10k characters. The splitlines approach finishes in
115+
# well under 1 second. Use 30 seconds as a generous ceiling to avoid
116+
# flakiness on slow CI while still catching catastrophic backtracking.
117+
assert elapsed < 30.0, (
118+
f'_extract_failures took {elapsed:.1f}s on a log with a 1M-char line; '
119+
f'likely catastrophic regex backtracking'
82120
)
83-
assert result == ["some error here"]

tmt/frameworks/shell.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from tmt.utils import Path
1212

1313
# Pattern to match lines containing "error" or "fail" as whole words.
14-
# Compiled at module level to avoid re-compilation per line.
14+
# Compiled at module level to avoid re-compilation per call.
1515
FAILURE_PATTERN = re.compile(r'\b(?:error|fail)\b', re.IGNORECASE)
1616

1717

0 commit comments

Comments
 (0)