Address review comments: use fixture, parametrize, long-line edge cases

thrix · thrix · commit 8e5920609988 · 2026-03-20T15:52:25.000+01:00
- Convert `_make_invocation` helper to a pytest factory fixture
- Parametrize matching tests into a single `test_extract_failures`
  with descriptive IDs instead of separate test functions
- Add long-line edge cases: error embedded within a long line
  (no newline separator), and word-boundary false positive
- Use 30s timeout ceiling instead of 5s to avoid CI flakiness
  while still catching catastrophic backtracking
- Add comments explaining `time.monotonic()` choice
- Minor: fix comment typo ("per line" -&gt; "per call")

Assisted-by: Claude Code
Signed-off-by: Miroslav Vadkerti &lt;mvadkert@redhat.com&gt;
diff --git a/tests/unit/test_shell.py b/tests/unit/test_shell.py
@@ -1,83 +1,120 @@
 import time
 from unittest.mock import MagicMock
 
+import pytest
+
 import tmt.utils
 from tmt.frameworks.shell import _extract_failures
 from tmt.utils import Path
 
 
-def _make_invocation(log_content: str) -> MagicMock:
-    """Create a mock TestInvocation that returns given log content on read."""
-    mock = MagicMock()
-    mock.phase.step.plan.execute.read.return_value = log_content
-    return mock
-
-
-def test_no_failures() -> None:
-    invocation = _make_invocation("all good\nnothing wrong here\n")
-    assert _extract_failures(invocation, Path("dummy.log")) == []
-
-
-def test_error_match() -> None:
-    invocation = _make_invocation("line1\nsome error occurred\nline3\n")
-    result = _extract_failures(invocation, Path("dummy.log"))
-    assert result == ["some error occurred"]
-
-
-def test_fail_match() -> None:
-    invocation = _make_invocation("test passed\ntest fail here\ndone\n")
-    result = _extract_failures(invocation, Path("dummy.log"))
-    assert result == ["test fail here"]
-
-
-def test_case_insensitive() -> None:
-    invocation = _make_invocation("ERROR: something\nFAIL: test\n")
-    result = _extract_failures(invocation, Path("dummy.log"))
-    assert result == ["ERROR: something", "FAIL: test"]
-
-
-def test_multiple_matches() -> None:
-    invocation = _make_invocation("ok\nerror one\npass\nfail two\nerror three\n")
-    result = _extract_failures(invocation, Path("dummy.log"))
-    assert result == ["error one", "fail two", "error three"]
-
-
-def test_word_boundary() -> None:
-    """Words like 'errorless' or 'failover' should not match."""
-    invocation = _make_invocation("errorless operation\nfailover complete\n")
-    assert _extract_failures(invocation, Path("dummy.log")) == []
-
-
-def test_file_error() -> None:
+@pytest.fixture
+def make_invocation():
+    """Factory fixture creating a mock TestInvocation returning given log content."""
+
+    def _factory(log_content: str) -> MagicMock:
+        mock = MagicMock()
+        mock.phase.step.plan.execute.read.return_value = log_content
+        return mock
+
+    return _factory
+
+
+_FAILURE_MATCH_CASES: list[tuple[str, str, list[str]]] = [
+    ('no failures', 'all good\nnothing wrong here\n', []),
+    ('error keyword', 'line1\nsome error occurred\nline3\n', ['some error occurred']),
+    ('fail keyword', 'test passed\ntest fail here\ndone\n', ['test fail here']),
+    ('case insensitive', 'ERROR: something\nFAIL: test\n', ['ERROR: something', 'FAIL: test']),
+    (
+        'multiple matches',
+        'ok\nerror one\npass\nfail two\nerror three\n',
+        ['error one', 'fail two', 'error three'],
+    ),
+    ('word boundary - no false positives', 'errorless operation\nfailover complete\n', []),
+]
+
+
+@pytest.mark.parametrize(
+    ('log_content', 'expected'),
+    [(log, expected) for _, log, expected in _FAILURE_MATCH_CASES],
+    ids=[name for name, _, _ in _FAILURE_MATCH_CASES],
+)
+def test_extract_failures(
+    make_invocation,
+    log_content: str,
+    expected: list[str],
+) -> None:
+    """Verify _extract_failures matches the correct lines."""
+    invocation = make_invocation(log_content)
+    assert _extract_failures(invocation, Path('dummy.log')) == expected
+
+
+def test_extract_failures_file_error() -> None:
+    """Verify _extract_failures returns empty list when the log file cannot be read."""
     invocation = MagicMock()
-    invocation.phase.step.plan.execute.read.side_effect = tmt.utils.FileError("not found")
-    assert _extract_failures(invocation, Path("dummy.log")) == []
-
-
-def test_long_lines_performance() -> None:
+    invocation.phase.step.plan.execute.read.side_effect = tmt.utils.FileError('not found')
+    assert _extract_failures(invocation, Path('dummy.log')) == []
+
+
+_LONG_LINE_CASES: list[tuple[str, str, list[str]]] = [
+    (
+        'long line without match followed by error line',
+        'start\n{long}\nsome error here\nend\n',
+        ['some error here'],
+    ),
+    (
+        'error embedded in long line without newline separator',
+        'start\n{long} error in the middle {long}\nend\n',
+        ['{long} error in the middle {long}'],
+    ),
+    (
+        'long line with word boundary - no false positive',
+        'start\n{long}errorless{long}\nend\n',
+        [],
+    ),
+]
+
+
+@pytest.mark.parametrize(
+    ('log_template', 'expected_template'),
+    [(log, expected) for _, log, expected in _LONG_LINE_CASES],
+    ids=[name for name, _, _ in _LONG_LINE_CASES],
+)
+def test_extract_failures_long_lines(
+    make_invocation,
+    log_template: str,
+    expected_template: list[str],
+) -> None:
     """
-    Regression test: regex must not cause catastrophic backtracking
-    on very long lines.
+    Verify correct handling of very long lines.
 
-    The original implementation used re.findall(r'.*\\b(?:error|fail)\\b.*', ...)
-    which caused O(n^2) or worse backtracking on long lines without matches,
+    The original implementation used ``re.findall(r'.*\\b(?:error|fail)\\b.*', ...)``
+    which caused catastrophic backtracking on long lines (O(n^2) or worse),
     hanging tmt processes for hours on 1M+ character lines (e.g. base64-encoded
     in-toto attestation payloads in container build logs).
+
+    The current implementation uses ``str.splitlines()`` and per-line
+    ``re.search()`` which processes each line in linear time.
     """
-    # Build a log with a 1M-character line (similar to base64 attestation data)
-    long_line = "A" * 1_000_000
-    log_content = f"start\n{long_line}\nsome error here\nend\n"
+    long_segment = 'A' * 1_000_000
+    log_content = log_template.replace('{long}', long_segment)
+    expected = [line.replace('{long}', long_segment) for line in expected_template]
 
-    invocation = _make_invocation(log_content)
+    invocation = make_invocation(log_content)
 
+    # time.monotonic() is the correct choice for elapsed time measurement
+    # as it is not affected by system clock adjustments.
     start = time.monotonic()
-    result = _extract_failures(invocation, Path("dummy.log"))
+    result = _extract_failures(invocation, Path('dummy.log'))
     elapsed = time.monotonic() - start
 
-    # Must complete in under 5 seconds (the old regex took 5+ seconds
-    # on just 10k characters, and would never complete on 1M characters)
-    assert elapsed < 5.0, (
-        f"_extract_failures took {elapsed:.1f}s on a log with a 1M-char line; "
-        f"likely catastrophic regex backtracking"
+    assert result == expected
+
+    # The old regex would never complete on lines this long — it took 5+
+    # seconds on just 10k characters. The splitlines approach finishes in
+    # well under 1 second. Use 30 seconds as a generous ceiling to avoid
+    # flakiness on slow CI while still catching catastrophic backtracking.
+    assert elapsed < 30.0, (
+        f'_extract_failures took {elapsed:.1f}s on a log with a 1M-char line; '
+        f'likely catastrophic regex backtracking'
     )
-    assert result == ["some error here"]
diff --git a/tmt/frameworks/shell.py b/tmt/frameworks/shell.py
@@ -11,7 +11,7 @@
 from tmt.utils import Path
 
 # Pattern to match lines containing "error" or "fail" as whole words.
-# Compiled at module level to avoid re-compilation per line.
+# Compiled at module level to avoid re-compilation per call.
 FAILURE_PATTERN = re.compile(r'\b(?:error|fail)\b', re.IGNORECASE)