Skip to content

Commit 537009d

Browse files
committed
rptest: switch to ripgrep in log search
Use ripgrep (rg) instead of grep for log searching in rptest. In my benchmark this results in about a 3x speedup when searching through 3 logs in parallel resulting from creating 10 topics of 1000 partitions. The benchmark is also included in this change, though @ignored so it does not run CI (it takes a minute or so). This requires translating GNU BRE (used by grep by default) to ERE (used by grep -E, and ripgrep). It was probably a mistake to use BRE in the first place, but it is what it is.
1 parent 452b00b commit 537009d

File tree

2 files changed

+105
-4
lines changed

2 files changed

+105
-4
lines changed

tests/rptest/services/utils.py

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ class LogSearch(ABC):
132132
"Exceptional future ignored",
133133
"UndefinedBehaviorSanitizer",
134134
"Aborting on shard",
135-
"libc++abi: terminating due to uncaught exception",
135+
"terminating due to uncaught exception",
136136
"oversized allocation",
137137
]
138138

@@ -157,7 +157,10 @@ def __init__(
157157

158158
@abstractmethod
159159
def _capture_log(self, node: Any, expr: str) -> Generator[str, None, None]:
160-
"""Method to get log from host node. Overriden by each child."""
160+
"""Method to get log from host node. Overriden by each child.
161+
162+
expr is a GNU BRE regex (i.e., the default grep regex style), which means
163+
you need to escape things like +() if you intend them to be metacharacters"""
161164
# Fake return type for type hint silence
162165
# And proper handling when called directly
163166
yield from []
@@ -248,6 +251,63 @@ def search_logs(self, versioned_nodes: VersionedNodes) -> None:
248251
raise BadLogLines(bad_loglines)
249252

250253

254+
def _gnu_bre_to_ere(bre_pattern: str) -> str:
255+
r"""
256+
Convert a GNU Basic Regular Expression (BRE) to a GNU Extended Regular
257+
Expression (ERE).
258+
259+
This function handles two main differences between GNU BRE and ERE:
260+
1. In BRE, `(`, `)`, `{`, `}`, `+`, `?`, and `|` are literal characters,
261+
whereas in ERE they are special metacharacters. To treat them as
262+
literals in ERE, they must be escaped with a backslash.
263+
2. In BRE, the escaped versions `\(`, `\)`, `\{`, `\}`, `\+`, `\?`, and
264+
`\|` have special meanings (grouping, intervals, etc.), while in ERE,
265+
the unescaped versions have these special meanings.
266+
267+
The conversion is performed by iterating through the BRE pattern and
268+
applying the following rules:
269+
- Unescaped `(`, `)`, `{`, `}`, `+`, `?`, `|` are escaped.
270+
- Escaped `\(`, `\)`, `\{`, `\}`, `\+`, `\?`, `\|` are unescaped.
271+
- Other characters, including other escaped characters (e.g., `\.`, `\*`),
272+
are kept as they are.
273+
- The logic correctly handles double backslashes (`\\`), ensuring they
274+
are preserved.
275+
"""
276+
277+
# these are metacharacters in both ERE and GNU BRE but in BRE
278+
# they must be escaped to have their metacharacter meaning
279+
BRE_ESCAPED_METACHARACTERS = set("(){}+?|")
280+
281+
ere_pattern = ""
282+
i = 0
283+
while i < len(bre_pattern):
284+
char = bre_pattern[i]
285+
if char == "\\":
286+
if i + 1 < len(bre_pattern):
287+
next_char = bre_pattern[i + 1]
288+
if next_char in BRE_ESCAPED_METACHARACTERS:
289+
# Unescape BRE metacharacters to become ERE metacharacters
290+
ere_pattern += next_char
291+
i += 2
292+
else:
293+
# Keep other escaped characters as they are (e.g., \\, \*, \.)
294+
ere_pattern += char + next_char
295+
i += 2
296+
else:
297+
# Trailing backslash
298+
ere_pattern += char
299+
i += 1
300+
elif char in BRE_ESCAPED_METACHARACTERS:
301+
# Escape ERE metacharacters that are literals in BRE
302+
ere_pattern += "\\" + char
303+
i += 1
304+
else:
305+
# Keep all other characters
306+
ere_pattern += char
307+
i += 1
308+
return ere_pattern
309+
310+
251311
class LogSearchLocal(LogSearch):
252312
def __init__(
253313
self,
@@ -260,7 +320,10 @@ def __init__(
260320
self.targetpath = targetpath
261321

262322
def _capture_log(self, node: ClusterNode, expr: str) -> Generator[str, None, None]:
263-
cmd = f"grep {expr} {self.targetpath} || true"
323+
if not expr.startswith("-P"):
324+
# some naughty tests use this to force grep/rg to use PRCE
325+
expr = _gnu_bre_to_ere(expr)
326+
cmd = f"rg {expr} {self.targetpath} || true"
264327
for line in node.account.ssh_capture(cmd):
265328
yield line
266329

tests/rptest/tests/services_self_test.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111
import signal
1212
from subprocess import CalledProcessError
1313
from typing import Any, Callable, Iterator
14+
import time
1415

1516
from ducktape.cluster.cluster import ClusterNode
1617
from ducktape.cluster.remoteaccount import RemoteCommandError
17-
from ducktape.mark import matrix
18+
from ducktape.mark import matrix, ignore
1819
from ducktape.mark.resource import cluster as dt_cluster
1920
from ducktape.tests.test import Test, TestContext
2021

@@ -667,6 +668,43 @@ def validate_exception(e: BadLogLines) -> bool:
667668
with expect_exception(BadLogLines, validate_exception):
668669
self.redpanda.raise_on_bad_logs(allow_list=[])
669670

671+
@ignore
672+
@cluster(num_nodes=3, check_allowed_error_logs=False)
673+
def test_bll_bench(self):
674+
"""
675+
Test that the LogMessage admin API correctly logs messages and that
676+
ERROR level logs are caught by raise_on_bad_logs.
677+
678+
Ignored by default since we don't want to run benchmarks in CI.
679+
"""
680+
# create and delete a 1000-partition topic 10 times
681+
rpk = RpkTool(self.redpanda)
682+
683+
parts = 1000
684+
685+
for i in range(10):
686+
topic_name = f"bll_bench_{i}"
687+
688+
def _all_partitions_present():
689+
try:
690+
desc = list(rpk.describe_topic(topic_name))
691+
return len(desc) == parts
692+
except Exception:
693+
return False
694+
695+
# 1000 partitions, replication factor 1 to avoid excess resource usage
696+
rpk.create_topic(topic_name, partitions=parts, replicas=3)
697+
self.redpanda.wait_until(
698+
_all_partitions_present, timeout_sec=30, backoff_sec=1
699+
)
700+
rpk.delete_topic(topic_name)
701+
self.logger.warning(f"c d topic {i}")
702+
703+
start = time.time()
704+
self.redpanda.raise_on_bad_logs(allow_list=[])
705+
elapsed = time.time() - start
706+
self.logger.warning(f"raise_on_bad_logs elapsed {elapsed:.3f}s")
707+
670708

671709
class RedpandaServiceSelfRawTest(Test):
672710
"""This 'raw' test inherits only from Test, so that internally it

0 commit comments

Comments
 (0)