Skip to content

Commit 0f90ef9

Browse files
committed
tp: add In filter support to TypedCursor and optimize In bytecode
Add SetFilterValueListUnchecked to TypedCursor allowing callers to pass a pointer+size array of FilterValue for In filters without allocation. Plumb this through the codegen'd ConstCursor/Cursor. Optimize the In bytecode by pre-building lookup structures during CastFilterValueList instead of rebuilding on every Execute(): - For dense Id/Uint32: BitVector (built once, not per-call) - For large sparse integer/string lists: FlatHashMapV2 for O(1) - For small lists (<=16): linear scan (cache-friendly) The lookup is stored as a variant in CastFilterValueListResult, replacing the separate value_list field. Migrate experimental_slice_layout to use In filter on track_id.
1 parent e7ca9ec commit 0f90ef9

File tree

9 files changed

+483
-126
lines changed

9 files changed

+483
-126
lines changed

python/generators/trace_processor_table/serialize.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,12 @@ class ConstCursor {{
484484
PERFETTO_ALWAYS_INLINE void SetFilterValueUnchecked(uint32_t index, C value) {{
485485
cursor_.SetFilterValueUnchecked(index, std::move(value));
486486
}}
487+
PERFETTO_ALWAYS_INLINE void SetFilterValueListUnchecked(
488+
uint32_t index,
489+
const dataframe::TypedCursor::FilterValue* values,
490+
uint32_t count) {{
491+
cursor_.SetFilterValueListUnchecked(index, values, count);
492+
}}
487493
RowNumber ToRowNumber() const {{
488494
return RowNumber{{cursor_.RowIndex()}};
489495
}}
@@ -510,6 +516,12 @@ class Cursor {{
510516
PERFETTO_ALWAYS_INLINE void SetFilterValueUnchecked(uint32_t index, C value) {{
511517
cursor_.SetFilterValueUnchecked(index, std::move(value));
512518
}}
519+
PERFETTO_ALWAYS_INLINE void SetFilterValueListUnchecked(
520+
uint32_t index,
521+
const dataframe::TypedCursor::FilterValue* values,
522+
uint32_t count) {{
523+
cursor_.SetFilterValueListUnchecked(index, values, count);
524+
}}
513525
RowNumber ToRowNumber() const {{
514526
return RowNumber{{cursor_.RowIndex()}};
515527
}}

src/trace_processor/core/dataframe/dataframe_unittest.cc

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,6 +1594,86 @@ TEST(DataframeTest, TypedCursorSetMultipleTimes) {
15941594
}
15951595
}
15961596

1597+
TEST(DataframeTest, TypedCursorInFilter) {
1598+
static constexpr auto kSpec = CreateTypedDataframeSpec(
1599+
{"id", "col2"}, CreateTypedColumnSpec(Id(), NonNull(), IdSorted()),
1600+
CreateTypedColumnSpec(Uint32(), NonNull(), Unsorted()));
1601+
StringPool pool;
1602+
Dataframe df = Dataframe::CreateFromTypedSpec(kSpec, &pool);
1603+
df.InsertUnchecked(kSpec, std::monostate(), 10u);
1604+
df.InsertUnchecked(kSpec, std::monostate(), 20u);
1605+
df.InsertUnchecked(kSpec, std::monostate(), 30u);
1606+
df.InsertUnchecked(kSpec, std::monostate(), 40u);
1607+
df.InsertUnchecked(kSpec, std::monostate(), 50u);
1608+
1609+
// Filter col2 IN (20, 40) using a pointer+size list.
1610+
using FV = TypedCursor::FilterValue;
1611+
TypedCursor cursor(&df, {FilterSpec{1, 0, In{}, {}}}, {});
1612+
FV values[] = {int64_t(20), int64_t(40)};
1613+
cursor.SetFilterValueListUnchecked(0, values, 2);
1614+
cursor.ExecuteUnchecked();
1615+
1616+
ASSERT_FALSE(cursor.Eof());
1617+
ASSERT_EQ(cursor.GetCellUnchecked<1>(kSpec), 20u);
1618+
cursor.Next();
1619+
1620+
ASSERT_FALSE(cursor.Eof());
1621+
ASSERT_EQ(cursor.GetCellUnchecked<1>(kSpec), 40u);
1622+
cursor.Next();
1623+
1624+
ASSERT_TRUE(cursor.Eof());
1625+
}
1626+
1627+
TEST(DataframeTest, TypedCursorInFilterEmpty) {
1628+
static constexpr auto kSpec = CreateTypedDataframeSpec(
1629+
{"id", "col2"}, CreateTypedColumnSpec(Id(), NonNull(), IdSorted()),
1630+
CreateTypedColumnSpec(Uint32(), NonNull(), Unsorted()));
1631+
StringPool pool;
1632+
Dataframe df = Dataframe::CreateFromTypedSpec(kSpec, &pool);
1633+
df.InsertUnchecked(kSpec, std::monostate(), 10u);
1634+
1635+
// Filter col2 IN () — empty list should return no rows.
1636+
TypedCursor cursor(&df, {FilterSpec{1, 0, In{}, {}}}, {});
1637+
cursor.SetFilterValueListUnchecked(0, nullptr, 0);
1638+
cursor.ExecuteUnchecked();
1639+
ASSERT_TRUE(cursor.Eof());
1640+
}
1641+
1642+
TEST(DataframeTest, TypedCursorInFilterReexecute) {
1643+
static constexpr auto kSpec = CreateTypedDataframeSpec(
1644+
{"id", "col2"}, CreateTypedColumnSpec(Id(), NonNull(), IdSorted()),
1645+
CreateTypedColumnSpec(Uint32(), NonNull(), Unsorted()));
1646+
StringPool pool;
1647+
Dataframe df = Dataframe::CreateFromTypedSpec(kSpec, &pool);
1648+
df.InsertUnchecked(kSpec, std::monostate(), 10u);
1649+
df.InsertUnchecked(kSpec, std::monostate(), 20u);
1650+
df.InsertUnchecked(kSpec, std::monostate(), 30u);
1651+
1652+
TypedCursor cursor(&df, {FilterSpec{1, 0, In{}, {}}}, {});
1653+
1654+
// First execution: IN (10, 30)
1655+
using FV = TypedCursor::FilterValue;
1656+
FV values1[] = {int64_t(10), int64_t(30)};
1657+
cursor.SetFilterValueListUnchecked(0, values1, 2);
1658+
cursor.ExecuteUnchecked();
1659+
ASSERT_FALSE(cursor.Eof());
1660+
ASSERT_EQ(cursor.GetCellUnchecked<1>(kSpec), 10u);
1661+
cursor.Next();
1662+
ASSERT_FALSE(cursor.Eof());
1663+
ASSERT_EQ(cursor.GetCellUnchecked<1>(kSpec), 30u);
1664+
cursor.Next();
1665+
ASSERT_TRUE(cursor.Eof());
1666+
1667+
// Second execution with different values: IN (20)
1668+
FV values2[] = {int64_t(20)};
1669+
cursor.SetFilterValueListUnchecked(0, values2, 1);
1670+
cursor.ExecuteUnchecked();
1671+
ASSERT_FALSE(cursor.Eof());
1672+
ASSERT_EQ(cursor.GetCellUnchecked<1>(kSpec), 20u);
1673+
cursor.Next();
1674+
ASSERT_TRUE(cursor.Eof());
1675+
}
1676+
15971677
TEST(DataframeTest,
15981678
QueryPlanEqualityFilterOnNoDuplicatesColumnEstimatesOneRow) {
15991679
static constexpr auto kSpec = CreateTypedDataframeSpec(

src/trace_processor/core/dataframe/typed_cursor.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ void TypedCursor::ExecuteUnchecked() {
3131
if (PERFETTO_UNLIKELY(last_execution_mutation_count_ != GetMutations())) {
3232
PrepareCursorInternal();
3333
}
34-
Fetcher fetcher{{}, filter_values_.data()};
34+
Fetcher fetcher{{}, filter_values_.data(), filter_value_list_states_.data()};
3535
cursor_.Execute(fetcher);
3636
}
3737

@@ -45,6 +45,8 @@ void TypedCursor::PrepareCursorInternal() {
4545
spec.value_index.value_or(std::numeric_limits<uint32_t>::max());
4646
}
4747
std::fill(filter_values_.begin(), filter_values_.end(), nullptr);
48+
std::fill(filter_value_list_states_.begin(), filter_value_list_states_.end(),
49+
FilterValueListState{});
4850
}
4951

5052
} // namespace perfetto::trace_processor::core::dataframe

src/trace_processor/core/dataframe/typed_cursor.h

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ class TypedCursor {
4343
public:
4444
using FilterValue =
4545
std::variant<std::nullptr_t, int64_t, double, const char*>;
46+
47+
// State for iterating over a list of filter values (used by In filters).
48+
// Points to caller-owned data; no allocation required.
49+
struct FilterValueListState {
50+
const FilterValue* data = nullptr;
51+
uint32_t size = 0;
52+
uint32_t current = 0;
53+
};
54+
4655
struct Fetcher : ValueFetcher {
4756
using Type = size_t;
4857
static const Type kInt64 = base::variant_index<FilterValue, int64_t>();
@@ -62,9 +71,26 @@ class TypedCursor {
6271
Type GetValueType(uint32_t col) const {
6372
return filter_values_[col].index();
6473
}
65-
static bool IteratorInit(uint32_t) { PERFETTO_FATAL("Unsupported"); }
66-
static bool IteratorNext(uint32_t) { PERFETTO_FATAL("Unsupported"); }
74+
bool IteratorInit(uint32_t col) {
75+
FilterValueListState& s = filter_value_list_states_[col];
76+
s.current = 0;
77+
if (s.size == 0) {
78+
return false;
79+
}
80+
filter_values_[col] = s.data[0];
81+
return true;
82+
}
83+
bool IteratorNext(uint32_t col) {
84+
FilterValueListState& s = filter_value_list_states_[col];
85+
++s.current;
86+
if (s.current >= s.size) {
87+
return false;
88+
}
89+
filter_values_[col] = s.data[s.current];
90+
return true;
91+
}
6792
FilterValue* filter_values_;
93+
FilterValueListState* filter_value_list_states_;
6894
};
6995

7096
TypedCursor(const Dataframe* dataframe,
@@ -107,6 +133,22 @@ class TypedCursor {
107133
SetFilterValueInternal(index, value);
108134
}
109135

136+
// Sets the filter value list at the given index for an In filter.
137+
// The caller must ensure that |values| remains valid until ExecuteUnchecked
138+
// completes. No copy is made.
139+
PERFETTO_ALWAYS_INLINE void SetFilterValueListUnchecked(
140+
uint32_t index,
141+
const FilterValue* values,
142+
uint32_t count) {
143+
if (PERFETTO_UNLIKELY(last_execution_mutation_count_ != GetMutations())) {
144+
PrepareCursorInternal();
145+
}
146+
uint32_t mapped = filter_value_mapping_[index];
147+
if (mapped != std::numeric_limits<uint32_t>::max()) {
148+
filter_value_list_states_[mapped] = {values, count, 0};
149+
}
150+
}
151+
110152
// Executes the current query plan against the specified filter values and
111153
// populates the cursor with the results.
112154
//
@@ -159,6 +201,7 @@ class TypedCursor {
159201
column_mutation_count_(core::Slab<uint32_t*>::Alloc(
160202
filter_specs_.size() + sort_specs_.size())) {
161203
filter_values_.resize(filter_specs_.size());
204+
filter_value_list_states_.resize(filter_specs_.size());
162205
filter_value_mapping_.resize(filter_specs_.size(),
163206
std::numeric_limits<uint32_t>::max());
164207
uint32_t i = 0;
@@ -194,6 +237,7 @@ class TypedCursor {
194237

195238
const Dataframe* dataframe_;
196239
std::vector<FilterValue> filter_values_;
240+
std::vector<FilterValueListState> filter_value_list_states_;
197241
std::vector<uint32_t> filter_value_mapping_;
198242
std::vector<FilterSpec> filter_specs_;
199243
std::vector<SortSpec> sort_specs_;

src/trace_processor/core/interpreter/bytecode_interpreter_benchmark.cc

Lines changed: 94 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ void BM_BytecodeInterpreter_LinearFilterEqUint32(benchmark::State& state) {
6666

6767
StringPool spool;
6868
Interpreter<Fetcher> interpreter;
69-
interpreter.Initialize(ParseBytecodeToVec(bytecode_str), 6, &spool);
69+
interpreter.Initialize(ParseBytecodeToVec(bytecode_str), 5, &spool);
7070

7171
// Set up storage pointer in register
7272
StoragePtr storage_ptr{col.storage.unchecked_data<Uint32>(), Uint32{}};
@@ -118,7 +118,7 @@ void BM_BytecodeInterpreter_LinearFilterEqString(benchmark::State& state) {
118118
)";
119119

120120
Interpreter<Fetcher> interpreter;
121-
interpreter.Initialize(ParseBytecodeToVec(bytecode_str), 6, &spool);
121+
interpreter.Initialize(ParseBytecodeToVec(bytecode_str), 5, &spool);
122122

123123
// Set up storage pointer in register
124124
StoragePtr storage_ptr{col.storage.unchecked_data<String>(), String{}};
@@ -136,6 +136,97 @@ void BM_BytecodeInterpreter_LinearFilterEqString(benchmark::State& state) {
136136
}
137137
BENCHMARK(BM_BytecodeInterpreter_LinearFilterEqString);
138138

139+
// Benchmark for In<Uint32> with varying list sizes.
140+
// Measures the combined cost of CastFilterValueList + In on each Execute().
141+
void BM_BytecodeInterpreter_InUint32(benchmark::State& state) {
142+
auto list_size = static_cast<uint32_t>(state.range(0));
143+
constexpr uint32_t kTableSize = 1024 * 1024;
144+
145+
// Setup column with values 0..1023 repeating.
146+
FlexVector<uint32_t> col_data_vec;
147+
for (uint32_t i = 0; i < kTableSize; ++i) {
148+
col_data_vec.push_back(i % 1024);
149+
}
150+
dataframe::Column col{dataframe::Storage{std::move(col_data_vec)},
151+
dataframe::NullStorage::NonNull{}, Unsorted{},
152+
HasDuplicates{}};
153+
154+
// Register layout:
155+
// R0: CastFilterValueListResult (filter value list)
156+
// R1: Range (source range)
157+
// R2: Span<uint32_t> (output indices)
158+
// R3: Slab<uint32_t> (backing storage for output)
159+
// R4: StoragePtr (column data pointer)
160+
std::string bytecode_str = R"(
161+
CastFilterValueList<Uint32>: [fval_handle=FilterValue(0), write_register=Register(0), op=Op(0)]
162+
InitRange: [size=1048576, dest_register=Register(1)]
163+
AllocateIndices: [size=1048576, dest_slab_register=Register(3), dest_span_register=Register(2)]
164+
Iota: [source_register=Register(1), update_register=Register(2)]
165+
In<Uint32>: [storage_register=Register(4), value_list_register=Register(0), source_register=Register(2), update_register=Register(2)]
166+
)";
167+
168+
StringPool spool;
169+
Interpreter<Fetcher> interpreter;
170+
interpreter.Initialize(ParseBytecodeToVec(bytecode_str), 5, &spool);
171+
172+
StoragePtr storage_ptr{col.storage.unchecked_data<Uint32>(), Uint32{}};
173+
interpreter.SetRegisterValue(WriteHandle<StoragePtr>(4), storage_ptr);
174+
175+
// Build fetcher with list_size values spread across 0..1023.
176+
Fetcher fetcher;
177+
for (uint32_t i = 0; i < list_size; ++i) {
178+
fetcher.value.push_back(int64_t(i * (1024 / list_size)));
179+
}
180+
181+
for (auto _ : state) {
182+
interpreter.Execute(fetcher);
183+
benchmark::ClobberMemory();
184+
}
185+
}
186+
BENCHMARK(BM_BytecodeInterpreter_InUint32)->Arg(5)->Arg(50)->Arg(500);
187+
188+
// Same benchmark but with Id type (exercises bitvector path).
189+
void BM_BytecodeInterpreter_InId(benchmark::State& state) {
190+
auto list_size = static_cast<uint32_t>(state.range(0));
191+
constexpr uint32_t kTableSize = 1024 * 1024;
192+
193+
dataframe::Column col{dataframe::Storage::Id{kTableSize},
194+
dataframe::NullStorage::NonNull{}, Unsorted{},
195+
HasDuplicates{}};
196+
197+
// Register layout:
198+
// R0: CastFilterValueListResult
199+
// R1: Range (source range)
200+
// R2: Span<uint32_t> (output indices)
201+
// R3: Slab<uint32_t> (backing storage)
202+
// R4: StoragePtr (column data pointer)
203+
std::string bytecode_str = R"(
204+
CastFilterValueList<Id>: [fval_handle=FilterValue(0), write_register=Register(0), op=Op(0)]
205+
InitRange: [size=1048576, dest_register=Register(1)]
206+
AllocateIndices: [size=1048576, dest_slab_register=Register(3), dest_span_register=Register(2)]
207+
Iota: [source_register=Register(1), update_register=Register(2)]
208+
In<Id>: [storage_register=Register(4), value_list_register=Register(0), source_register=Register(2), update_register=Register(2)]
209+
)";
210+
211+
StringPool spool;
212+
Interpreter<Fetcher> interpreter;
213+
interpreter.Initialize(ParseBytecodeToVec(bytecode_str), 5, &spool);
214+
215+
StoragePtr storage_ptr{nullptr, Id{}};
216+
interpreter.SetRegisterValue(WriteHandle<StoragePtr>(4), storage_ptr);
217+
218+
Fetcher fetcher;
219+
for (uint32_t i = 0; i < list_size; ++i) {
220+
fetcher.value.push_back(int64_t(i * (kTableSize / list_size)));
221+
}
222+
223+
for (auto _ : state) {
224+
interpreter.Execute(fetcher);
225+
benchmark::ClobberMemory();
226+
}
227+
}
228+
BENCHMARK(BM_BytecodeInterpreter_InId)->Arg(5)->Arg(50)->Arg(500);
229+
139230
} // namespace
140231

141232
static void BM_BytecodeInterpreter_SortUint32(benchmark::State& state) {
@@ -223,7 +314,7 @@ static void BM_BytecodeInterpreter_SortString(benchmark::State& state) {
223314
)";
224315

225316
Interpreter<Fetcher> interpreter;
226-
interpreter.Initialize(ParseBytecodeToVec(bytecode_str), 6, &spool);
317+
interpreter.Initialize(ParseBytecodeToVec(bytecode_str), 5, &spool);
227318

228319
// Set up storage pointer in register
229320
StoragePtr storage_ptr{col.storage.unchecked_data<String>(), String{}};

0 commit comments

Comments
 (0)