Skip to content

Commit ad716eb

Browse files
committed
tp: add IndexedFilterIn bytecode for In on indexed columns
When a column has an index and the query uses an In filter, the planner now emits IndexedFilterIn instead of the generic In bytecode. For each value in the list, IndexedFilterIn binary- searches the index permutation vector (O(log N) per value) and concatenates the matching ranges. This reduces In filter cost from O(N) to O(k log N + matches) where k is the number of values and N is the table size.
1 parent 0f90ef9 commit ad716eb

File tree

5 files changed

+358
-30
lines changed

5 files changed

+358
-30
lines changed

src/trace_processor/core/dataframe/dataframe_unittest.cc

Lines changed: 70 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -946,7 +946,33 @@ TEST_F(DataframeBytecodeTest, PlanQuery_SingleColIndex_EqFilter_NonNullInt) {
946946
std::string expected_bytecode = R"(
947947
InitRange: [size=100, dest_register=Register(0)]
948948
CastFilterValue<Uint32>: [fval_handle=FilterValue(0), write_register=Register(3), op=NonNullOp(0)]
949-
IndexedFilterEq<Uint32, NonNull>: [storage_register=Register(5), null_bv_register=Register(6), filter_value_reg=Register(3), popcount_register=Register(4), source_register=Register(1), dest_register=Register(2)]
949+
IndexedFilterEq<Uint32, NonNull>: [storage_register=Register(4), null_bv_register=Register(5), filter_value_reg=Register(3), popcount_register=Register(6), source_register=Register(1), dest_register=Register(2)]
950+
AllocateIndices: [size=100, dest_slab_register=Register(7), dest_span_register=Register(8)]
951+
CopySpanIntersectingRange: [source_register=Register(2), source_range_register=Register(0), update_register=Register(8)]
952+
)";
953+
RunBytecodeTest(df, filters, {}, {}, {}, expected_bytecode,
954+
/*cols_used=*/1);
955+
}
956+
957+
TEST_F(DataframeBytecodeTest, PlanQuery_SingleColIndex_InFilter_NonNullInt) {
958+
static constexpr auto kSpec = CreateTypedDataframeSpec(
959+
{"col1"}, CreateTypedColumnSpec(Uint32{}, NonNull{}, Unsorted{}));
960+
Dataframe df = Dataframe::CreateFromTypedSpec(kSpec, &string_pool_);
961+
for (uint32_t i = 0; i < 100; ++i) {
962+
df.InsertUnchecked(kSpec, i);
963+
}
964+
df.Finalize();
965+
966+
std::vector<uint32_t> p_vec(100);
967+
std::iota(p_vec.begin(), p_vec.end(), 0);
968+
df.AddIndex(
969+
Index({0}, std::make_shared<std::vector<uint32_t>>(std::move(p_vec))));
970+
971+
std::vector<FilterSpec> filters = {{0, 0, In{}, std::nullopt}};
972+
std::string expected_bytecode = R"(
973+
InitRange: [size=100, dest_register=Register(0)]
974+
CastFilterValueList<Uint32>: [fval_handle=FilterValue(0), write_register=Register(3), op=NonNullOp(0)]
975+
IndexedFilterIn<Uint32, NonNull>: [storage_register=Register(4), null_bv_register=Register(5), value_list_register=Register(3), popcount_register=Register(6), source_register=Register(1), dest_register=Register(2)]
950976
AllocateIndices: [size=100, dest_slab_register=Register(7), dest_span_register=Register(8)]
951977
CopySpanIntersectingRange: [source_register=Register(2), source_range_register=Register(0), update_register=Register(8)]
952978
)";
@@ -976,13 +1002,13 @@ TEST_F(DataframeBytecodeTest,
9761002
std::string expected_bytecode = R"(
9771003
InitRange: [size=4, dest_register=Register(0)]
9781004
CastFilterValue<String>: [fval_handle=FilterValue(0), write_register=Register(3), op=NonNullOp(0)]
979-
PrefixPopcount: [null_bv_register=Register(5), dest_register=Register(4)]
980-
IndexedFilterEq<String, SparseNull>: [storage_register=Register(6), null_bv_register=Register(5), filter_value_reg=Register(3), popcount_register=Register(4), source_register=Register(1), dest_register=Register(2)]
1005+
IndexedFilterEq<String, SparseNull>: [storage_register=Register(4), null_bv_register=Register(5), filter_value_reg=Register(3), popcount_register=Register(6), source_register=Register(1), dest_register=Register(2)]
1006+
PrefixPopcount: [null_bv_register=Register(5), dest_register=Register(6)]
9811007
AllocateIndices: [size=4, dest_slab_register=Register(7), dest_span_register=Register(8)]
9821008
CopySpanIntersectingRange: [source_register=Register(2), source_range_register=Register(0), update_register=Register(8)]
9831009
AllocateIndices: [size=8, dest_slab_register=Register(9), dest_span_register=Register(10)]
9841010
StrideCopy: [source_register=Register(8), update_register=Register(10), stride=2]
985-
StrideTranslateAndCopySparseNullIndices: [null_bv_register=Register(5), popcount_register=Register(4), update_register=Register(10), offset=1, stride=2]
1011+
StrideTranslateAndCopySparseNullIndices: [null_bv_register=Register(5), popcount_register=Register(6), update_register=Register(10), offset=1, stride=2]
9861012
)";
9871013
RunBytecodeTest(df, filters, {}, {}, {}, expected_bytecode);
9881014
}
@@ -1005,12 +1031,12 @@ TEST_F(DataframeBytecodeTest, PlanQuery_SingleColIndex_EqFilter_DenseNullInt) {
10051031
std::string expected_bytecode = R"(
10061032
InitRange: [size=4, dest_register=Register(0)]
10071033
CastFilterValue<Uint32>: [fval_handle=FilterValue(0), write_register=Register(3), op=NonNullOp(0)]
1008-
IndexedFilterEq<Uint32, DenseNull>: [storage_register=Register(5), null_bv_register=Register(6), filter_value_reg=Register(3), popcount_register=Register(4), source_register=Register(1), dest_register=Register(2)]
1034+
IndexedFilterEq<Uint32, DenseNull>: [storage_register=Register(4), null_bv_register=Register(5), filter_value_reg=Register(3), popcount_register=Register(6), source_register=Register(1), dest_register=Register(2)]
10091035
AllocateIndices: [size=4, dest_slab_register=Register(7), dest_span_register=Register(8)]
10101036
CopySpanIntersectingRange: [source_register=Register(2), source_range_register=Register(0), update_register=Register(8)]
10111037
AllocateIndices: [size=8, dest_slab_register=Register(9), dest_span_register=Register(10)]
10121038
StrideCopy: [source_register=Register(8), update_register=Register(10), stride=2]
1013-
StrideCopyDenseNullIndices: [null_bv_register=Register(6), update_register=Register(10), offset=1, stride=2]
1039+
StrideCopyDenseNullIndices: [null_bv_register=Register(5), update_register=Register(10), offset=1, stride=2]
10141040
)";
10151041
RunBytecodeTest(df, filters, {}, {}, {}, expected_bytecode);
10161042
}
@@ -1040,9 +1066,9 @@ TEST_F(DataframeBytecodeTest, PlanQuery_MultiColIndex_PrefixEqFilters) {
10401066
std::string expected_bytecode = R"(
10411067
InitRange: [size=4, dest_register=Register(0)]
10421068
CastFilterValue<Uint32>: [fval_handle=FilterValue(0), write_register=Register(3), op=NonNullOp(0)]
1043-
IndexedFilterEq<Uint32, NonNull>: [storage_register=Register(5), null_bv_register=Register(6), filter_value_reg=Register(3), popcount_register=Register(4), source_register=Register(1), dest_register=Register(2)]
1069+
IndexedFilterEq<Uint32, NonNull>: [storage_register=Register(4), null_bv_register=Register(5), filter_value_reg=Register(3), popcount_register=Register(6), source_register=Register(1), dest_register=Register(2)]
10441070
CastFilterValue<Uint32>: [fval_handle=FilterValue(1), write_register=Register(7), op=NonNullOp(0)]
1045-
IndexedFilterEq<Uint32, NonNull>: [storage_register=Register(9), null_bv_register=Register(10), filter_value_reg=Register(7), popcount_register=Register(8), source_register=Register(2), dest_register=Register(2)]
1071+
IndexedFilterEq<Uint32, NonNull>: [storage_register=Register(8), null_bv_register=Register(9), filter_value_reg=Register(7), popcount_register=Register(10), source_register=Register(2), dest_register=Register(2)]
10461072
AllocateIndices: [size=4, dest_slab_register=Register(11), dest_span_register=Register(12)]
10471073
CopySpanIntersectingRange: [source_register=Register(2), source_range_register=Register(0), update_register=Register(12)]
10481074
)";
@@ -1674,6 +1700,42 @@ TEST(DataframeTest, TypedCursorInFilterReexecute) {
16741700
ASSERT_TRUE(cursor.Eof());
16751701
}
16761702

1703+
TEST(DataframeTest, TypedCursorInFilterWithIndex) {
1704+
static constexpr auto kSpec = CreateTypedDataframeSpec(
1705+
{"id", "track_id"}, CreateTypedColumnSpec(Id(), NonNull(), IdSorted()),
1706+
CreateTypedColumnSpec(Uint32(), NonNull(), Unsorted()));
1707+
StringPool pool;
1708+
Dataframe df = Dataframe::CreateFromTypedSpec(kSpec, &pool);
1709+
// Insert rows with track_ids: 1, 2, 1, 3, 2, 1
1710+
df.InsertUnchecked(kSpec, std::monostate(), 1u);
1711+
df.InsertUnchecked(kSpec, std::monostate(), 2u);
1712+
df.InsertUnchecked(kSpec, std::monostate(), 1u);
1713+
df.InsertUnchecked(kSpec, std::monostate(), 3u);
1714+
df.InsertUnchecked(kSpec, std::monostate(), 2u);
1715+
df.InsertUnchecked(kSpec, std::monostate(), 1u);
1716+
df.Finalize();
1717+
1718+
// Build an index on track_id (column 1). Sorted order: 1,1,1,2,2,3
1719+
// Permutation: {0, 2, 5, 1, 4, 3}
1720+
df.AddIndex(Index({1}, std::make_shared<std::vector<uint32_t>>(
1721+
std::vector<uint32_t>{0, 2, 5, 1, 4, 3})));
1722+
1723+
// IN (1, 3) should return rows 0, 2, 5 (track_id=1) and 3 (track_id=3).
1724+
using FV = TypedCursor::FilterValue;
1725+
TypedCursor cursor(&df, {FilterSpec{1, 0, In{}, {}}}, {});
1726+
FV values[] = {int64_t(1), int64_t(3)};
1727+
cursor.SetFilterValueListUnchecked(0, values, 2);
1728+
cursor.ExecuteUnchecked();
1729+
1730+
std::vector<uint32_t> result_ids;
1731+
while (!cursor.Eof()) {
1732+
result_ids.push_back(cursor.GetCellUnchecked<0>(kSpec));
1733+
cursor.Next();
1734+
}
1735+
// Results should contain ids for rows 0, 2, 3, 5 (in some order).
1736+
EXPECT_THAT(result_ids, testing::UnorderedElementsAre(0, 2, 3, 5));
1737+
}
1738+
16771739
TEST(DataframeTest,
16781740
QueryPlanEqualityFilterOnNoDuplicatesColumnEstimatesOneRow) {
16791741
static constexpr auto kSpec = CreateTypedDataframeSpec(

src/trace_processor/core/dataframe/query_plan.cc

Lines changed: 58 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,8 @@ std::optional<BestIndex> GetBestIndexForFilterSpecs(
199199
continue;
200200
}
201201
const FilterSpec& current_spec = all_specs[spec_idx];
202-
if (current_spec.col == column && current_spec.op.Is<Eq>()) {
202+
if (current_spec.col == column &&
203+
(current_spec.op.Is<Eq>() || current_spec.op.Is<In>())) {
203204
current_specs_for_this_index.push_back(spec_idx);
204205
found_spec_for_column = true;
205206
break;
@@ -840,33 +841,68 @@ void QueryPlanBuilder::IndexConstraints(
840841
for (uint32_t spec_idx : filter_specs) {
841842
FilterSpec& fs = specs[spec_idx];
842843
const Column& column = GetColumn(fs.col);
843-
auto value_reg = CastFilterValue(fs, column.storage.type(),
844-
*fs.op.TryDowncast<i::NonNullOp>());
845844
auto non_id = column.storage.type().TryDowncast<i::NonIdStorageType>();
846845
PERFETTO_CHECK(non_id);
847-
{
848-
using B = i::IndexedFilterEqBase;
846+
847+
auto alloc_popcount = [&]() {
849848
using PopcountHandle = i::ReadHandle<Slab<uint32_t>>;
850-
PopcountHandle popcount_register;
849+
PopcountHandle reg;
851850
if (column.null_storage.nullability().IsAnyOf<SparseNullTypes>()) {
852-
popcount_register = PrefixPopcountRegisterFor(fs.col);
851+
reg = PrefixPopcountRegisterFor(fs.col);
853852
} else {
854-
// Dummy register for non-sparse null columns. IndexedFilterEq knows
855-
// how to handle this.
856-
popcount_register = builder_.AllocateRegister<Slab<uint32_t>>();
853+
reg = builder_.AllocateRegister<Slab<uint32_t>>();
854+
}
855+
return reg;
856+
};
857+
858+
if (fs.op.Is<In>()) {
859+
// Emit IndexedFilterIn for In filters.
860+
StorageType ct = column.storage.type();
861+
i::RwHandle<i::CastFilterValueListResult> value_list_reg =
862+
builder_.AllocateRegister<i::CastFilterValueListResult>();
863+
{
864+
using B = i::CastFilterValueListBase;
865+
auto& bc = AddOpcode<B>(i::Index<i::CastFilterValueList>(ct),
866+
UnchangedRowCount{});
867+
bc.arg<B::fval_handle>() = {plan_.params.filter_value_count};
868+
bc.arg<B::write_register>() = value_list_reg;
869+
bc.arg<B::op>() = Eq{};
870+
fs.value_index = plan_.params.filter_value_count++;
871+
}
872+
{
873+
using B = i::IndexedFilterInBase;
874+
auto& bc = AddOpcode<B>(
875+
i::Index<i::IndexedFilterIn>(
876+
*non_id, NullabilityToSparseNullCollapsedNullability(
877+
column.null_storage.nullability())),
878+
RowCountModifier{EqualityFilterRowCount{column.duplicate_state}});
879+
bc.arg<B::storage_register>() =
880+
StorageRegisterFor(fs.col, non_id->Upcast<StorageType>());
881+
bc.arg<B::null_bv_register>() = NullBitvectorRegisterFor(fs.col);
882+
bc.arg<B::value_list_register>() = value_list_reg;
883+
bc.arg<B::popcount_register>() = alloc_popcount();
884+
bc.arg<B::source_register>() = source_reg;
885+
bc.arg<B::dest_register>() = dest_reg;
886+
}
887+
} else {
888+
// Emit IndexedFilterEq for Eq filters.
889+
auto value_reg = CastFilterValue(fs, column.storage.type(),
890+
*fs.op.TryDowncast<i::NonNullOp>());
891+
{
892+
using B = i::IndexedFilterEqBase;
893+
auto& bc = AddOpcode<B>(
894+
i::Index<i::IndexedFilterEq>(
895+
*non_id, NullabilityToSparseNullCollapsedNullability(
896+
column.null_storage.nullability())),
897+
RowCountModifier{EqualityFilterRowCount{column.duplicate_state}});
898+
bc.arg<B::storage_register>() =
899+
StorageRegisterFor(fs.col, non_id->Upcast<StorageType>());
900+
bc.arg<B::null_bv_register>() = NullBitvectorRegisterFor(fs.col);
901+
bc.arg<B::filter_value_reg>() = value_reg;
902+
bc.arg<B::popcount_register>() = alloc_popcount();
903+
bc.arg<B::source_register>() = source_reg;
904+
bc.arg<B::dest_register>() = dest_reg;
857905
}
858-
auto& bc = AddOpcode<B>(
859-
i::Index<i::IndexedFilterEq>(
860-
*non_id, NullabilityToSparseNullCollapsedNullability(
861-
column.null_storage.nullability())),
862-
RowCountModifier{EqualityFilterRowCount{column.duplicate_state}});
863-
bc.arg<B::storage_register>() =
864-
StorageRegisterFor(fs.col, non_id->Upcast<StorageType>());
865-
bc.arg<B::null_bv_register>() = NullBitvectorRegisterFor(fs.col);
866-
bc.arg<B::filter_value_reg>() = value_reg;
867-
bc.arg<B::popcount_register>() = popcount_register;
868-
bc.arg<B::source_register>() = source_reg;
869-
bc.arg<B::dest_register>() = dest_reg;
870906
}
871907
// After first filter, subsequent filters read from dest and write back to
872908
// dest.

src/trace_processor/core/interpreter/bytecode_instructions.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,35 @@ struct LinearFilterEq : LinearFilterEqBase {
580580
static_assert(TS1::Contains<T>());
581581
};
582582

583+
// Filters a column sorted by an index using a list of values (IN operator).
584+
// For each value in the list, performs binary search on the index permutation
585+
// vector and concatenates matching ranges.
586+
struct IndexedFilterInBase
587+
: TemplatedBytecode2<NonIdStorageType, SparseNullCollapsedNullability> {
588+
// TODO(lalitm): while the cost type is legitimate, the cost estimate inside
589+
// is plucked from thin air and has no real foundation. Fix this by creating
590+
// benchmarks and backing it up with actual data.
591+
static constexpr Cost kCost = LogPerRowCost{10};
592+
593+
PERFETTO_DATAFRAME_BYTECODE_IMPL_6(ReadHandle<StoragePtr>,
594+
storage_register,
595+
ReadHandle<const BitVector*>,
596+
null_bv_register,
597+
ReadHandle<CastFilterValueListResult>,
598+
value_list_register,
599+
ReadHandle<Slab<uint32_t>>,
600+
popcount_register,
601+
ReadHandle<Span<uint32_t>>,
602+
source_register,
603+
WriteHandle<Span<uint32_t>>,
604+
dest_register);
605+
};
606+
template <typename T, typename N>
607+
struct IndexedFilterIn : IndexedFilterInBase {
608+
static_assert(TS1::Contains<T>());
609+
static_assert(TS2::Contains<N>());
610+
};
611+
583612
// Filters rows based on a list of values (IN operator).
584613
struct InBase : TemplatedBytecode1<StorageType> {
585614
// TODO(lalitm): while the cost type is legitimate, the cost estimate inside
@@ -766,6 +795,21 @@ struct FilterTreeState : Bytecode {
766795
X(IndexedFilterEq<String, NonNull>) \
767796
X(IndexedFilterEq<String, SparseNull>) \
768797
X(IndexedFilterEq<String, DenseNull>) \
798+
X(IndexedFilterIn<Uint32, NonNull>) \
799+
X(IndexedFilterIn<Uint32, SparseNull>) \
800+
X(IndexedFilterIn<Uint32, DenseNull>) \
801+
X(IndexedFilterIn<Int32, NonNull>) \
802+
X(IndexedFilterIn<Int32, SparseNull>) \
803+
X(IndexedFilterIn<Int32, DenseNull>) \
804+
X(IndexedFilterIn<Int64, NonNull>) \
805+
X(IndexedFilterIn<Int64, SparseNull>) \
806+
X(IndexedFilterIn<Int64, DenseNull>) \
807+
X(IndexedFilterIn<Double, NonNull>) \
808+
X(IndexedFilterIn<Double, SparseNull>) \
809+
X(IndexedFilterIn<Double, DenseNull>) \
810+
X(IndexedFilterIn<String, NonNull>) \
811+
X(IndexedFilterIn<String, SparseNull>) \
812+
X(IndexedFilterIn<String, DenseNull>) \
769813
X(CopySpanIntersectingRange) \
770814
X(InitRankMap) \
771815
X(CollectIdIntoRankMap) \

src/trace_processor/core/interpreter/bytecode_interpreter_impl.h

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,6 +1481,82 @@ inline PERFETTO_ALWAYS_INLINE void IndexedFilterEq(
14811481
state.WriteToRegister(bytecode.arg<B::dest_register>(), dest);
14821482
}
14831483

1484+
// IndexedFilterIn: for each value in the list, binary-searches the index
1485+
// permutation vector and concatenates matching ranges into dest.
1486+
template <typename T, typename N>
1487+
inline PERFETTO_ALWAYS_INLINE void IndexedFilterIn(
1488+
InterpreterState& state,
1489+
const IndexedFilterInBase& bytecode) {
1490+
using B = IndexedFilterInBase;
1491+
const auto& value_list =
1492+
state.ReadFromRegister(bytecode.arg<B::value_list_register>());
1493+
const auto& source =
1494+
state.ReadFromRegister(bytecode.arg<B::source_register>());
1495+
Span<uint32_t> dest(source.b, source.b);
1496+
1497+
if (!HandleInvalidCastFilterValueResult(value_list.validity, dest)) {
1498+
state.WriteToRegister(bytecode.arg<B::dest_register>(), dest);
1499+
return;
1500+
}
1501+
1502+
const auto* data =
1503+
state.ReadStorageFromRegister<T>(bytecode.arg<B::storage_register>());
1504+
const Slab<uint32_t>* popcnt =
1505+
state.MaybeReadFromRegister(bytecode.arg<B::popcount_register>());
1506+
const BitVector* const* null_bv =
1507+
state.MaybeReadFromRegister(bytecode.arg<B::null_bv_register>());
1508+
1509+
const auto& lookup = value_list.lookup;
1510+
1511+
// Extract the value list. For the indexed path, we always need the
1512+
// FlexVector to iterate individual values for binary search.
1513+
using M =
1514+
StorageType::VariantTypeAtIndex<T, CastFilterValueListResult::ValueList>;
1515+
const M* val = nullptr;
1516+
if (auto* vl = std::get_if<CastFilterValueListResult::ValueList>(&lookup)) {
1517+
val = &base::unchecked_get<M>(*vl);
1518+
}
1519+
if (!val) {
1520+
// BitVector/HashMap lookup — fall through to empty result.
1521+
// This shouldn't happen since the planner only uses this bytecode
1522+
// for small In lists (which stay as ValueList).
1523+
state.WriteToRegister(bytecode.arg<B::dest_register>(), dest);
1524+
return;
1525+
}
1526+
1527+
// For each value, binary search the permutation vector and append matches.
1528+
// The value list stores StringPool::Id for strings (already resolved during
1529+
// CastFilterValueList), and native types for everything else.
1530+
using ValElem =
1531+
StorageType::VariantTypeAtIndex<T, CastFilterValueListResult::Value>;
1532+
uint32_t* write = dest.b;
1533+
for (size_t v = 0; v < val->size(); ++v) {
1534+
ValElem cmp_val = (*val)[v];
1535+
1536+
auto* lb = std::lower_bound(
1537+
source.b, source.e, cmp_val,
1538+
[&](uint32_t index, const ValElem& target) {
1539+
uint32_t si = IndexToStorageIndex<N>(index, null_bv, popcnt);
1540+
if (si == std::numeric_limits<uint32_t>::max())
1541+
return true;
1542+
return data[si] < target;
1543+
});
1544+
auto* ub = std::upper_bound(
1545+
lb, source.e, cmp_val, [&](const ValElem& target, uint32_t index) {
1546+
uint32_t si = IndexToStorageIndex<N>(index, null_bv, popcnt);
1547+
if (si == std::numeric_limits<uint32_t>::max())
1548+
return false;
1549+
return target < data[si];
1550+
});
1551+
// Copy matching range to output.
1552+
auto count = static_cast<size_t>(ub - lb);
1553+
memmove(write, lb, count * sizeof(uint32_t));
1554+
write += count;
1555+
}
1556+
dest.e = write;
1557+
state.WriteToRegister(bytecode.arg<B::dest_register>(), dest);
1558+
}
1559+
14841560
inline PERFETTO_ALWAYS_INLINE void Uint32SetIdSortedEq(
14851561
InterpreterState& state,
14861562
const Uint32SetIdSortedEq& bytecode) {

0 commit comments

Comments
 (0)