Skip to content

Commit cfb4aba

Browse files
committed
feat(logfiles): read log files in chunks for efficiency
Refactor log file reading to process in chunks, improving efficiency. Updated docstrings for better clarity and added detailed argument descriptions. This change ensures that large log files are handled more efficiently by reading them in manageable chunks.
1 parent c0fdae5 commit cfb4aba

File tree

1 file changed

+54
-13
lines changed

1 file changed

+54
-13
lines changed

cardano_node_tests/utils/logfiles.py

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -294,21 +294,40 @@ def find_msgs_in_logs(
294294
timestamp: float,
295295
only_first: bool = False,
296296
) -> list[str]:
297-
"""Find messages in log."""
297+
"""Find messages in log.
298+
299+
Args:
300+
regex (str): The regular expression to search for.
301+
logfile (Path): The path to the log file.
302+
seek_offset (int): The seek offset in the log file.
303+
timestamp (float): The timestamp to filter log entries.
304+
only_first (bool): Whether to return only the first match.
305+
306+
Returns:
307+
list[str]: A list of matching log lines.
308+
"""
298309
regex_comp = re.compile(regex)
299310
lines_found = []
300-
for logfile_rec in _get_rotated_logs(
301-
logfile=pl.Path(logfile), seek=seek_offset, timestamp=timestamp
302-
):
311+
for logfile_rec in _get_rotated_logs(logfile=logfile, seek=seek_offset, timestamp=timestamp):
303312
with open(logfile_rec.logfile, encoding="utf-8") as infile:
304313
infile.seek(logfile_rec.seek)
305-
for line in infile:
306-
if regex_comp.search(line):
307-
lines_found.append(line)
308-
if only_first:
309-
break
310-
if lines_found and only_first:
311-
break
314+
315+
# Read the file in chunks
316+
incomplete_line = ""
317+
while chunk := infile.read(BUFFER_SIZE):
318+
# Prepend any leftover from the last chunk
319+
if incomplete_line:
320+
chunk = incomplete_line + chunk
321+
lines = chunk.splitlines(keepends=True) # Preserve line endings
322+
323+
# Check if the last line is incomplete
324+
incomplete_line = lines.pop() if not lines[-1].endswith(("\n", "\r")) else ""
325+
326+
for line in lines:
327+
if regex_comp.search(line):
328+
lines_found.append(line)
329+
if only_first:
330+
return lines_found
312331
return lines_found
313332

314333

@@ -318,7 +337,17 @@ def check_msgs_presence_in_logs(
318337
state_dir: pl.Path,
319338
timestamp: float,
320339
) -> list[str]:
321-
"""Check if the expected messages are present in logs."""
340+
"""Check if the expected messages are present in logs.
341+
342+
Args:
343+
regex_pairs (list[tuple[str, str]]): List of tuples with file globs and regex patterns.
344+
seek_offsets (dict[str, int]): Dictionary of file seek offsets.
345+
state_dir (Path): Path to the state directory.
346+
timestamp (float): Timestamp to filter log entries.
347+
348+
Returns:
349+
list[str]: List of error messages for missing log entries.
350+
"""
322351
errors = []
323352
for files_glob, regex in regex_pairs:
324353
regex_comp = re.compile(regex)
@@ -337,7 +366,19 @@ def check_msgs_presence_in_logs(
337366
):
338367
with open(logfile_rec.logfile, encoding="utf-8") as infile:
339368
infile.seek(logfile_rec.seek)
340-
for line in infile:
369+
370+
# Read the file in chunks
371+
incomplete_line = ""
372+
while chunk := infile.read(BUFFER_SIZE):
373+
# Prepend any leftover from the last chunk
374+
if incomplete_line:
375+
chunk = incomplete_line + chunk
376+
lines = chunk.splitlines(keepends=True) # Preserve line endings
377+
378+
# Check if the last line is incomplete
379+
incomplete_line = lines.pop() if not lines[-1].endswith(("\n", "\r")) else ""
380+
381+
for line in lines:
341382
if regex_comp.search(line):
342383
line_found = True
343384
break

0 commit comments

Comments
 (0)