Skip to content

Commit 81a497f

Browse files
authored
Merge pull request #28692 from travisdowns/td-BLL-speedup
BadLogLines speed ups
2 parents 51fd551 + e62fe62 commit 81a497f

File tree

12 files changed

+440
-91
lines changed

12 files changed

+440
-91
lines changed

proto/redpanda/core/admin/internal/v1/debug.proto

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,22 @@ message ThrowStructuredExceptionRequest {
4343
}
4444
message ThrowStructuredExceptionResponse {}
4545

46+
enum LogLevel {
47+
LOG_LEVEL_UNSPECIFIED = 0;
48+
LOG_LEVEL_TRACE = 1;
49+
LOG_LEVEL_DEBUG = 2;
50+
LOG_LEVEL_INFO = 3;
51+
LOG_LEVEL_WARN = 4;
52+
LOG_LEVEL_ERROR = 5;
53+
}
54+
55+
message LogMessageRequest {
56+
string message = 1;
57+
LogLevel level = 2;
58+
}
59+
60+
message LogMessageResponse {}
61+
4662
// The DebugService provides access to internal debugging information and debug
4763
// operations for the cluster or node.
4864
//
@@ -71,4 +87,9 @@ service DebugService {
7187
authz: SUPERUSER,
7288
};
7389
}
90+
rpc LogMessage(LogMessageRequest) returns (LogMessageResponse) {
91+
option (pbgen.rpc) = {
92+
authz: SUPERUSER,
93+
};
94+
}
7495
}

src/v/redpanda/admin/services/internal/debug.cc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,4 +131,35 @@ debug_service_impl::stop_stress_fiber(
131131
co_return proto::stop_stress_fiber_response{};
132132
}
133133

134+
seastar::future<proto::admin::log_message_response>
135+
debug_service_impl::log_message(
136+
serde::pb::rpc::context, proto::admin::log_message_request req) {
137+
auto msg = req.get_message();
138+
auto level = req.get_level();
139+
140+
using enum proto::admin::log_level;
141+
142+
ss::log_level ss_level = [=]() {
143+
switch (level) {
144+
case trace:
145+
return ss::log_level::trace;
146+
case debug:
147+
return ss::log_level::debug;
148+
case info:
149+
return ss::log_level::info;
150+
case warn:
151+
return ss::log_level::warn;
152+
case error:
153+
return ss::log_level::error;
154+
case unspecified:
155+
default:
156+
throw serde::pb::rpc::invalid_argument_exception(
157+
"Invalid log level specified");
158+
}
159+
}();
160+
161+
log.log(ss_level, "{}", msg);
162+
co_return proto::admin::log_message_response{};
163+
}
164+
134165
} // namespace admin

src/v/redpanda/admin/services/internal/debug.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ class debug_service_impl : public proto::admin::debug_service {
4545
serde::pb::rpc::context,
4646
proto::admin::stop_stress_fiber_request) override;
4747

48+
seastar::future<proto::admin::log_message_response> log_message(
49+
serde::pb::rpc::context, proto::admin::log_message_request) override;
50+
4851
private:
4952
admin::proxy::client _client;
5053
ss::sharded<stress_fiber_manager>& _stress_fiber_manager;

tests/docker/ducktape-deps/tool-pkgs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
# tests, but not for building the early layers of the image themselves. They
55
# are installed relatively late in the Dockerfile, after most of the heavy
66
# "non mainline" layers have forked off.
7-
set -e
7+
set -euo pipefail
8+
89
apt-get update
910
apt-get install -qq \
1011
bind9-dnsutils \
@@ -57,3 +58,22 @@ unset LD_LIBRARY_PATH
5758
exec /usr/bin/llvm-symbolizer-$LLVM_VERSION "\$@"
5859
EOF
5960
chmod +x /usr/local/bin/llvm-symbolizer
61+
62+
###########
63+
# ripgrep #
64+
###########
65+
66+
RG_VERSION=15.1.0
67+
RG_BASE_URL="https://github.com/BurntSushi/ripgrep/releases/download"
68+
if [ $(uname -m) = "aarch64" ]; then
69+
# ripgrep- 15.1.0 -aarch64-unknown-linux-gnu.tar.gz
70+
RG_TARBALL="ripgrep-${RG_VERSION}-aarch64-unknown-linux-gnu.tar.gz"
71+
else
72+
# ripgrep- 15.1.0 -x86_64-unknown-linux-musl.tar.gz
73+
RG_TARBALL="ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz"
74+
fi
75+
mkdir -p /opt/ripgrep
76+
RG_URL="${RG_BASE_URL}/${RG_VERSION}/${RG_TARBALL}"
77+
echo "Downloading ripgrep from ${RG_URL}"
78+
curl -sSL "$RG_URL" | tar -xz -C /opt/ripgrep --strip-components=1
79+
ln -sf /opt/ripgrep/rg /usr/local/bin/rg

tests/rptest/clients/admin/proto/redpanda/core/admin/internal/v1/debug_pb2.py

Lines changed: 11 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/rptest/clients/admin/proto/redpanda/core/admin/internal/v1/debug_pb2.pyi

Lines changed: 48 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/rptest/clients/admin/proto/redpanda/core/admin/internal/v1/debug_pb2_connect.py

Lines changed: 34 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/rptest/services/openmessaging_benchmark.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
RedpandaService,
2727
RedpandaServiceCloud,
2828
)
29-
from rptest.services.utils import BadLogLines, VersionAndLines
29+
from rptest.services.utils import BadLogLines, NodeToLines, VersionAndLines
3030

3131
from ducktape.tests.test import TestContext
3232

@@ -150,7 +150,7 @@ def raise_on_bad_log_lines(self, node: ClusterNode) -> None:
150150
def make_vl() -> VersionAndLines:
151151
return {"version": None, "lines": []}
152152

153-
bad_lines: dict[ClusterNode, VersionAndLines] = collections.defaultdict(make_vl)
153+
bad_lines: NodeToLines = collections.defaultdict(make_vl)
154154
self.logger.info(f"Scanning node {node.account.hostname} log for errors...")
155155

156156
for line in node.account.ssh_capture(
@@ -420,7 +420,7 @@ def raise_on_bad_log_lines(self, node: ClusterNode) -> None:
420420
def make_vl() -> VersionAndLines:
421421
return {"version": None, "lines": []}
422422

423-
bad_lines: dict[ClusterNode, VersionAndLines] = collections.defaultdict(make_vl)
423+
bad_lines: NodeToLines = collections.defaultdict(make_vl)
424424
self.logger.info(f"Scanning node {node.account.hostname} log for errors...")
425425

426426
for line in node.account.ssh_capture(

tests/rptest/services/redpanda.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,7 +1294,7 @@ def all_up(self) -> bool:
12941294

12951295
@abstractmethod
12961296
def raise_on_bad_logs(
1297-
self, allow_list: LogAllowList = (), test_start_time: float | None = None
1297+
self, allow_list: LogAllowList = (), test_start_time: float = 0
12981298
) -> None:
12991299
pass
13001300

@@ -2340,17 +2340,21 @@ def _get_restart_count(p: dict[str, Any]):
23402340
# Check if stored pod and loaded one is the same
23412341
_stored_pod = _get_stored_pod(pod["metadata"]["uid"])
23422342
if _stored_pod is None:
2343-
raise NodeCrash((_name, "Pod not found among prior stored ones"))
2343+
raise NodeCrash([(_name, "Pod not found among prior stored ones")])
23442344

23452345
# Check if container inside pod stayed the same
23462346
container_id = _get_container_id(pod["status"])
23472347
if _get_container_id(_stored_pod._status) != container_id:
2348-
raise NodeCrash((_name, "Pod container mismatch with prior stored one"))
2348+
raise NodeCrash(
2349+
[(_name, "Pod container mismatch with prior stored one")]
2350+
)
23492351

23502352
# Check that restart count is the same
23512353
restart_count = _get_restart_count(pod["status"])
23522354
if _get_restart_count(_stored_pod._status) != restart_count:
2353-
raise NodeCrash((_name, "Pod has been restarted due to possible crash"))
2355+
raise NodeCrash(
2356+
[(_name, "Pod has been restarted due to possible crash")]
2357+
)
23542358

23552359
# Worth to note that rebuilding stored broker classes
23562360
# can be skipped in this case since nothing changed now
@@ -2397,7 +2401,7 @@ def cluster_healthy(self) -> bool:
23972401
return self.cluster_unhealthy_reason is not None
23982402

23992403
def raise_on_bad_logs(
2400-
self, allow_list: LogAllowList = (), test_start_time: float | None = None
2404+
self, allow_list: LogAllowList = (), test_start_time: float = 0
24012405
) -> None:
24022406
"""
24032407
Raise a BadLogLines exception if any nodes' logs contain errors
@@ -3020,7 +3024,7 @@ def set_skip_if_no_redpanda_log(self, v: bool):
30203024
self._skip_if_no_redpanda_log = v
30213025

30223026
def raise_on_bad_logs(
3023-
self, allow_list: LogAllowList = (), test_start_time: float | None = None
3027+
self, allow_list: LogAllowList = (), test_start_time: float = 0
30243028
):
30253029
"""
30263030
Raise a BadLogLines exception if any nodes' logs contain errors not

0 commit comments

Comments
 (0)