Skip to content

Commit add682c

Browse files
committed
refactor(logfiles): optimize error search in log lines
Improve performance of log file error search by adding a fast prefilter that checks for error tokens in each chunk before processing lines. This reduces unnecessary line-by-line checks when no errors are present. Also ensures the error regex is unanchored for correct chunk scanning.
1 parent 1f2e759 commit add682c

File tree

1 file changed

+20
-4
lines changed

1 file changed

+20
-4
lines changed

cardano_node_tests/utils/logfiles.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
LOGGER = logging.getLogger(__name__)
2121

2222
BUFFER_SIZE = 512 * 1024 # 512 KB buffer
23-
ROTATED_RE = re.compile(r".+\.[0-9]+") # detect rotated log file
23+
ROTATED_RE = re.compile(r".+\.[0-9]+") # Detect rotated log file
24+
# NOTE: The regex needs to be unanchored.
2425
ERRORS_RE = re.compile("error|fail", re.IGNORECASE)
2526
ERRORS_IGNORE_FILE_NAME = ".errors_to_ignore"
2627

@@ -88,6 +89,7 @@
8889
}
8990

9091
# Relevant errors from supervisord.log
92+
# NOTE: The regex needs to be unanchored.
9193
SUPERVISORD_ERRORS_RE = re.compile("not expected|FATAL", re.IGNORECASE)
9294

9395

@@ -296,10 +298,10 @@ def _validated_start(seek: int | None, size: int) -> int:
296298
return seek
297299

298300

299-
def _search_log_lines(
301+
def _search_log_lines( # noqa: C901
300302
logfile: pl.Path,
301303
rotated_logs: list[RotableLog],
302-
errors_re: re.Pattern[str],
304+
errors_re: re.Pattern[str], # The the error regex needs to be unanchored
303305
*,
304306
errors_ignored_re: re.Pattern[str] | None = None,
305307
look_back_map: dict[str, str] | None = None,
@@ -313,6 +315,8 @@ def _search_log_lines(
313315
- Persists a byte offset at a line boundary for the live logfile.
314316
"""
315317
errs_b = _compile_bytes_from_pattern(pat=errors_re, encoding=encoding)
318+
if not errs_b:
319+
return []
316320
ign_b = _compile_bytes_from_pattern(pat=errors_ignored_re, encoding=encoding)
317321
lb_pairs = _compile_look_back_map_bytes(
318322
m=look_back_map, flags=errors_re.flags, encoding=encoding
@@ -338,15 +342,27 @@ def _search_log_lines(
338342
break
339343

340344
buf = leftover + chunk
345+
346+
# Fast prefilter: is there *any* error token in this chunk?
347+
# Safe only when the error regex is unanchored.
348+
has_error_in_chunk = bool(errs_b.search(buf))
349+
341350
lines_b, leftover = _split_complete_lines(buf=buf)
342351

352+
if not has_error_in_chunk:
353+
# No error candidates: just update look-back (cheap) and continue.
354+
# (Deque will keep only the last look_back_lines lines.)
355+
for ln in lines_b:
356+
look_back.append(ln)
357+
continue
358+
343359
for line_b in lines_b:
344360
# Fast ignore
345361
if ign_b and ign_b.search(line_b):
346362
look_back.append(line_b)
347363
continue
348364
# Not an error -> just update context
349-
if not (errs_b and errs_b.search(line_b)):
365+
if not errs_b.search(line_b):
350366
look_back.append(line_b)
351367
continue
352368
# Error: maybe ignore based on mapping

0 commit comments

Comments
 (0)