Skip to content

Commit cc7b51d

Browse files
authored
tools: add host side tool for string filtering (#5185)
Simple tool which allows easy testing of whether string filtering is working correctly.
1 parent 555783c commit cc7b51d

File tree

3 files changed

+361
-0
lines changed

3 files changed

+361
-0
lines changed

src/tools/BUILD.gn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ group("tools") {
2929
"proto_filter",
3030
"proto_merger",
3131
"protoprofile",
32+
"string_filter_tool",
3233
]
3334
if (is_linux || is_android) {
3435
deps += [
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright (C) 2026 The Android Open Source Project
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import("../../../gn/perfetto_host_executable.gni")
16+
17+
perfetto_host_executable("string_filter_tool") {
18+
testonly = true
19+
deps = [
20+
"../../../gn:default_deps",
21+
"../../../gn:protobuf_full",
22+
"../../../protos/perfetto/config:cpp",
23+
"../../base",
24+
"../../proto_utils:txt_to_pb",
25+
"../../protozero/filtering:message_filter",
26+
"../../protozero/filtering:message_filter_config",
27+
"../../protozero/filtering:string_filter",
28+
]
29+
sources = [ "string_filter_tool.cc" ]
30+
}
Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
/*
2+
* Copyright (C) 2026 The Android Open Source Project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <fcntl.h>
18+
#include <cstdint>
19+
#include <cstdio>
20+
#include <cstdlib>
21+
#include <cstring>
22+
#include <optional>
23+
#include <string>
24+
#include <vector>
25+
26+
#include "perfetto/base/logging.h"
27+
#include "perfetto/ext/base/file_utils.h"
28+
#include "perfetto/ext/base/getopt.h" // IWYU pragma: keep
29+
#include "perfetto/ext/base/scoped_file.h"
30+
#include "perfetto/ext/base/string_utils.h"
31+
#include "protos/perfetto/config/trace_config.gen.h"
32+
#include "src/proto_utils/txt_to_pb.h"
33+
#include "src/protozero/filtering/message_filter.h"
34+
#include "src/protozero/filtering/message_filter_config.h"
35+
#include "src/protozero/filtering/string_filter.h"
36+
37+
namespace perfetto {
38+
namespace string_filter_tool {
39+
namespace {
40+
41+
const char kUsage[] =
42+
R"USAGE(Usage: string_filter_tool -r <rules_textproto> [-t <semantic_type>] <string>
43+
string_filter_tool -r <rules_textproto> -i <trace_in> -o <trace_out>
44+
45+
Mode 1 (string mode):
46+
Applies the Perfetto string filtering algorithm to <string> using the rules
47+
defined in <rules_textproto> and prints the result to stdout.
48+
49+
Mode 2 (trace mode):
50+
Applies bytecode-based proto field filtering and string filtering to a
51+
binary-encoded trace file. The config must contain bytecode fields
52+
(bytecode/bytecode_v2) in addition to string_filter_chain rules.
53+
54+
Arguments:
55+
-r --rules: Path to a TraceConfig textproto file. The
56+
trace_filter.string_filter_chain field provides string
57+
filter rules. For trace mode, bytecode/bytecode_v2 fields
58+
are also required.
59+
-t --semantic_type: Semantic type to use (integer, default: 0 = UNSPECIFIED).
60+
Only used in string mode.
61+
-i --trace_in: Path to a binary-encoded proto trace file to filter.
62+
-o --trace_out: Path for the filtered trace output.
63+
<string> The string to filter (positional argument, string mode).
64+
65+
The rules textproto file should contain a TraceConfig with trace_filter rules,
66+
for example:
67+
68+
trace_filter {
69+
string_filter_chain {
70+
rules {
71+
policy: SFP_MATCH_REDACT_GROUPS
72+
regex_pattern: "foo(bar)baz"
73+
}
74+
}
75+
}
76+
77+
For trace mode, include bytecode fields:
78+
79+
trace_filter {
80+
bytecode_v2: "\000..."
81+
string_filter_chain {
82+
rules { ... }
83+
}
84+
}
85+
86+
Output (string mode):
87+
Prints the (possibly filtered) string to stdout, followed by a newline.
88+
Exit code 0 if the string was modified, 1 if it was not.
89+
90+
Output (trace mode):
91+
Writes the filtered trace to --trace_out. Exit code 0 on success.
92+
)USAGE";
93+
94+
using TraceFilter = protos::gen::TraceConfig::TraceFilter;
95+
using StringFilterRule = TraceFilter::StringFilterRule;
96+
97+
std::optional<protozero::StringFilter::Policy> ConvertPolicy(
98+
TraceFilter::StringFilterPolicy policy) {
99+
switch (policy) {
100+
case TraceFilter::SFP_UNSPECIFIED:
101+
return std::nullopt;
102+
case TraceFilter::SFP_MATCH_REDACT_GROUPS:
103+
return protozero::StringFilter::Policy::kMatchRedactGroups;
104+
case TraceFilter::SFP_ATRACE_MATCH_REDACT_GROUPS:
105+
return protozero::StringFilter::Policy::kAtraceMatchRedactGroups;
106+
case TraceFilter::SFP_MATCH_BREAK:
107+
return protozero::StringFilter::Policy::kMatchBreak;
108+
case TraceFilter::SFP_ATRACE_MATCH_BREAK:
109+
return protozero::StringFilter::Policy::kAtraceMatchBreak;
110+
case TraceFilter::SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS:
111+
return protozero::StringFilter::Policy::kAtraceRepeatedSearchRedactGroups;
112+
}
113+
return std::nullopt;
114+
}
115+
116+
protozero::StringFilter::SemanticTypeMask ConvertSemanticTypes(
117+
const StringFilterRule& rule) {
118+
protozero::StringFilter::SemanticTypeMask mask;
119+
if (rule.semantic_type().empty()) {
120+
mask.Set(0);
121+
return mask;
122+
}
123+
for (const auto& type : rule.semantic_type()) {
124+
auto semantic_type = static_cast<uint32_t>(type);
125+
if (semantic_type < protozero::StringFilter::SemanticTypeMask::kLimit) {
126+
mask.Set(semantic_type);
127+
}
128+
}
129+
return mask;
130+
}
131+
132+
int LoadStringFilterRules(const TraceFilter& trace_filter,
133+
protozero::StringFilter& filter) {
134+
for (const auto& rule : trace_filter.string_filter_chain().rules()) {
135+
auto opt_policy = ConvertPolicy(rule.policy());
136+
if (!opt_policy) {
137+
PERFETTO_ELOG("Unknown string filter policy %d", rule.policy());
138+
return 1;
139+
}
140+
filter.AddRule(*opt_policy, rule.regex_pattern(),
141+
rule.atrace_payload_starts_with(), rule.name(),
142+
ConvertSemanticTypes(rule));
143+
}
144+
145+
// Also load v54 chain if present.
146+
for (const auto& rule : trace_filter.string_filter_chain_v54().rules()) {
147+
auto opt_policy = ConvertPolicy(rule.policy());
148+
if (!opt_policy) {
149+
PERFETTO_ELOG("Unknown string filter policy %d", rule.policy());
150+
return 1;
151+
}
152+
filter.AddRule(*opt_policy, rule.regex_pattern(),
153+
rule.atrace_payload_starts_with(), rule.name(),
154+
ConvertSemanticTypes(rule));
155+
}
156+
return 0;
157+
}
158+
159+
int Main(int argc, char** argv) {
160+
static const option long_options[] = {
161+
{"help", no_argument, nullptr, 'h'},
162+
{"rules", required_argument, nullptr, 'r'},
163+
{"semantic_type", required_argument, nullptr, 't'},
164+
{"trace_in", required_argument, nullptr, 'i'},
165+
{"trace_out", required_argument, nullptr, 'o'},
166+
{nullptr, 0, nullptr, 0},
167+
};
168+
169+
std::string rules_path;
170+
std::string trace_in;
171+
std::string trace_out;
172+
uint32_t semantic_type = 0;
173+
174+
for (;;) {
175+
int option = getopt_long(argc, argv, "hr:t:i:o:", long_options, nullptr);
176+
if (option == -1)
177+
break;
178+
179+
if (option == 'h') {
180+
fprintf(stdout, "%s", kUsage);
181+
return 0;
182+
}
183+
184+
if (option == 'r') {
185+
rules_path = optarg;
186+
continue;
187+
}
188+
189+
if (option == 't') {
190+
auto parsed = base::CStringToUInt32(optarg);
191+
if (!parsed.has_value()) {
192+
PERFETTO_ELOG("Invalid semantic type: %s\n", optarg);
193+
return 1;
194+
}
195+
semantic_type = *parsed;
196+
continue;
197+
}
198+
199+
if (option == 'i') {
200+
trace_in = optarg;
201+
continue;
202+
}
203+
204+
if (option == 'o') {
205+
trace_out = optarg;
206+
continue;
207+
}
208+
209+
PERFETTO_ELOG("%s", kUsage);
210+
return 1;
211+
}
212+
213+
if (rules_path.empty()) {
214+
PERFETTO_ELOG("%s", kUsage);
215+
return 1;
216+
}
217+
218+
// Read and parse the rules textproto.
219+
std::string rules_data;
220+
if (!base::ReadFile(rules_path, &rules_data)) {
221+
PERFETTO_ELOG("Could not read rules file: %s", rules_path.c_str());
222+
return 1;
223+
}
224+
225+
auto res = TraceConfigTxtToPb(rules_data, rules_path);
226+
if (!res.ok()) {
227+
PERFETTO_ELOG("%s\n", res.status().c_message());
228+
return 1;
229+
}
230+
231+
std::vector<uint8_t>& config_bytes = res.value();
232+
protos::gen::TraceConfig config;
233+
config.ParseFromArray(config_bytes.data(), config_bytes.size());
234+
235+
const auto& trace_filter = config.trace_filter();
236+
237+
// Trace mode: apply bytecode + string filtering to a trace file.
238+
if (!trace_in.empty()) {
239+
if (trace_out.empty()) {
240+
PERFETTO_ELOG("--trace_out (-o) is required when using --trace_in\n");
241+
return 1;
242+
}
243+
244+
protozero::MessageFilter msg_filter;
245+
auto status = protozero::LoadMessageFilterConfig(trace_filter, &msg_filter);
246+
if (!status.ok()) {
247+
PERFETTO_ELOG("%s", status.c_message());
248+
return 1;
249+
}
250+
251+
// Read the input trace.
252+
std::string trace_data;
253+
if (!base::ReadFile(trace_in, &trace_data)) {
254+
PERFETTO_ELOG("Could not read trace file: %s", trace_in.c_str());
255+
return 1;
256+
}
257+
258+
// Apply the filter.
259+
auto filtered =
260+
msg_filter.FilterMessage(trace_data.data(), trace_data.size());
261+
if (filtered.error) {
262+
PERFETTO_ELOG("Filtering failed");
263+
return 1;
264+
}
265+
266+
// Write the filtered trace.
267+
auto fd = base::OpenFile(trace_out, O_WRONLY | O_TRUNC | O_CREAT, 0644);
268+
if (!fd) {
269+
PERFETTO_ELOG("Could not open output file: %s", trace_out.c_str());
270+
return 1;
271+
}
272+
base::WriteAll(*fd, filtered.data.get(), filtered.size);
273+
PERFETTO_LOG("Written filtered trace (%zu bytes) to %s", filtered.size,
274+
trace_out.c_str());
275+
return 0;
276+
}
277+
278+
// String mode: apply string filtering to a single string.
279+
if (optind >= argc) {
280+
PERFETTO_ELOG("%s", kUsage);
281+
return 1;
282+
}
283+
284+
// The remaining positional argument is the string to filter.
285+
// Unescape C-style escape sequences (\n, \t, \\) so users can pass
286+
// strings containing newlines from the shell.
287+
std::string input_str;
288+
for (const char* p = argv[optind]; *p; ++p) {
289+
if (*p == '\\' && *(p + 1)) {
290+
switch (*(p + 1)) {
291+
case 'n':
292+
input_str += '\n';
293+
++p;
294+
continue;
295+
case 't':
296+
input_str += '\t';
297+
++p;
298+
continue;
299+
case '\\':
300+
input_str += '\\';
301+
++p;
302+
continue;
303+
default:
304+
break;
305+
}
306+
}
307+
input_str += *p;
308+
}
309+
310+
protozero::StringFilter filter;
311+
int err = LoadStringFilterRules(trace_filter, filter);
312+
if (err)
313+
return err;
314+
315+
// Apply the filter. MaybeFilter modifies the string in-place.
316+
bool was_modified =
317+
filter.MaybeFilter(input_str.data(), input_str.size(), semantic_type);
318+
319+
// Print the result.
320+
fprintf(stdout, "%s\n", input_str.c_str());
321+
return was_modified ? 0 : 1;
322+
}
323+
324+
} // namespace
325+
} // namespace string_filter_tool
326+
} // namespace perfetto
327+
328+
int main(int argc, char** argv) {
329+
return perfetto::string_filter_tool::Main(argc, argv);
330+
}

0 commit comments

Comments
 (0)