Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions tests/unit/test_shell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import time
from unittest.mock import MagicMock

import pytest

import tmt.utils
from tmt.frameworks.shell import _extract_failures
from tmt.utils import Path


@pytest.fixture
def make_invocation():
"""Factory fixture creating a mock TestInvocation returning given log content."""

def _factory(log_content: str) -> MagicMock:
mock = MagicMock()
mock.phase.step.plan.execute.read.return_value = log_content
return mock

return _factory
Comment on lines +11 to +20
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the whole factory? You do not need it, let it provide just the mock object and mock the relevant part in the tests. That would make it more consistent with all other instances



_FAILURE_MATCH_CASES: list[tuple[str, str, list[str]]] = [
('no failures', 'all good\nnothing wrong here\n', []),
('error keyword', 'line1\nsome error occurred\nline3\n', ['some error occurred']),
('fail keyword', 'test passed\ntest fail here\ndone\n', ['test fail here']),
('case insensitive', 'ERROR: something\nFAIL: test\n', ['ERROR: something', 'FAIL: test']),
(
'multiple matches',
'ok\nerror one\npass\nfail two\nerror three\n',
['error one', 'fail two', 'error three'],
),
('word boundary - no false positives', 'errorless operation\nfailover complete\n', []),
]


@pytest.mark.parametrize(
('log_content', 'expected'),
[(log, expected) for _, log, expected in _FAILURE_MATCH_CASES],
ids=[name for name, _, _ in _FAILURE_MATCH_CASES],
)
def test_extract_failures(
make_invocation,
log_content: str,
expected: list[str],
) -> None:
"""Verify _extract_failures matches the correct lines."""
invocation = make_invocation(log_content)
assert _extract_failures(invocation, Path('dummy.log')) == expected


def test_extract_failures_file_error() -> None:
"""Verify _extract_failures returns empty list when the log file cannot be read."""
invocation = MagicMock()
invocation.phase.step.plan.execute.read.side_effect = tmt.utils.FileError('not found')
assert _extract_failures(invocation, Path('dummy.log')) == []


_LONG_LINE_CASES: list[tuple[str, str, list[str]]] = [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please no list of tuple of list of ... Make it structured in a dataclass

(
'long line without match followed by error line',
'start\n{long}\nsome error here\nend\n',
['some error here'],
),
(
'error embedded in long line without newline separator',
'start\n{long} error in the middle {long}\nend\n',
['{long} error in the middle {long}'],
),
(
'long line with word boundary - no false positive',
'start\n{long}errorless{long}\nend\n',
[],
),
Comment on lines +60 to +74
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This just shows that what we are doing in _extract_failures is likely wrong, e.g. the user could have something

def test_accuracy()
    value = get_value()
    assert abs(value - refrence) < 1E-10
    print("All is good, the value is within the acceptable error bar") 

Why are we catching any word error and concluding that if it is present it must be relevant. It is a very poorman's guess and the question is if anyone is actually relying on these.

Would consider adding a deprecation note and see who actually complains and/or cares about them. A 0 second evaluation is faster still

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LecrisUT sure, but that is not for this issue, I will file a new one

]


@pytest.mark.parametrize(
('log_template', 'expected_template'),
[(log, expected) for _, log, expected in _LONG_LINE_CASES],
ids=[name for name, _, _ in _LONG_LINE_CASES],
Comment on lines +79 to +81
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use pytest.param to put the id next to the value. Maybe also store it in the dataclass

)
def test_extract_failures_long_lines(
make_invocation,
log_template: str,
expected_template: list[str],
) -> None:
"""
Verify correct handling of very long lines.

The original implementation used ``re.findall(r'.*\\b(?:error|fail)\\b.*', ...)``
which caused catastrophic backtracking on long lines (O(n^2) or worse),
hanging tmt processes for hours on 1M+ character lines (e.g. base64-encoded
in-toto attestation payloads in container build logs).

The current implementation uses ``str.splitlines()`` and per-line
``re.search()`` which processes each line in linear time.
"""
long_segment = 'A' * 1_000_000
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move this construction outside of the test, and make it more lorem-ipsum-like with more word separators and variations that it could randomly hit.

log_content = log_template.replace('{long}', long_segment)
expected = [line.replace('{long}', long_segment) for line in expected_template]

invocation = make_invocation(log_content)

# time.monotonic() is the correct choice for elapsed time measurement
# as it is not affected by system clock adjustments.
start = time.monotonic()
result = _extract_failures(invocation, Path('dummy.log'))
elapsed = time.monotonic() - start

assert result == expected

# The old regex would never complete on lines this long — it took 5+
# seconds on just 10k characters. The splitlines approach finishes in
# well under 1 second. Use 30 seconds as a generous ceiling to avoid
# flakiness on slow CI while still catching catastrophic backtracking.
Comment on lines +113 to +116
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like this. The difference is well within a single order of magnitude. It seems that the test is not catching anything significant.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, here are the measurements that I got using the 50000 length:

  • Old method
    • Case 1: 11.335 s
    • Case 2: 0.001 s
    • Case 3: 49.290 s
  • New method: All 0.001 s

What I would propose is documenting this difference explicitly. Choose whatever size you want and run them fully and document explicitly how much they (can) differ. E.g. in this case all of them are expected to be instantaneous in the new case so cutting in the middle, having a threshold of 0.1 s guarantees that on either side will for sure fail and for sure pass, mentioning how much the time difference is expected:

  • at the time of writing the test with the old method expecting XX s
  • at the time of writing the test with the current method expected below measurable 0.001 ms

Running these does highlight one weird quirk between Case 1 and Case 2. I would not have expected that the latter to be that much faster in the old method given the documentation in the test

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran the test locally without the code fix (to make sure it fails) and it hung until I did Ctrl+C. Gemini suggested I add @pytest.mark.timeout(5) which force fails the test after 5 (or however many) sec and it can move onto the next test. We would need pytest-timeout installed. It worked when I added it, and it should hopefully prevent our CI from getting stuck if anyone breaks the regex again.

assert elapsed < 30.0, (
f'_extract_failures took {elapsed:.1f}s on a log with a 1M-char line; '
f'likely catastrophic regex backtracking'
)
6 changes: 5 additions & 1 deletion tmt/frameworks/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,18 @@
from tmt.steps.execute import TEST_OUTPUT_FILENAME, TestInvocation
from tmt.utils import Path

# Pattern to match lines containing "error" or "fail" as whole words.
# Compiled at module level to avoid re-compilation per call.
FAILURE_PATTERN = re.compile(r'\b(?:error|fail)\b', re.IGNORECASE)


def _extract_failures(invocation: 'TestInvocation', log_path: Path) -> list[str]:
try:
log = invocation.phase.step.plan.execute.read(log_path)
except tmt.utils.FileError:
return []

return re.findall(r'.*\b(?:error|fail)\b.*', log, re.IGNORECASE | re.MULTILINE)
return [line for line in log.splitlines() if FAILURE_PATTERN.search(line)]


@provides_framework('shell')
Expand Down
Loading