@@ -946,7 +946,33 @@ TEST_F(DataframeBytecodeTest, PlanQuery_SingleColIndex_EqFilter_NonNullInt) {
946946 std::string expected_bytecode = R"(
947947 InitRange: [size=100, dest_register=Register(0)]
948948 CastFilterValue<Uint32>: [fval_handle=FilterValue(0), write_register=Register(3), op=NonNullOp(0)]
949- IndexedFilterEq<Uint32, NonNull>: [storage_register=Register(5), null_bv_register=Register(6), filter_value_reg=Register(3), popcount_register=Register(4), source_register=Register(1), dest_register=Register(2)]
949+ IndexedFilterEq<Uint32, NonNull>: [storage_register=Register(4), null_bv_register=Register(5), filter_value_reg=Register(3), popcount_register=Register(6), source_register=Register(1), dest_register=Register(2)]
950+ AllocateIndices: [size=100, dest_slab_register=Register(7), dest_span_register=Register(8)]
951+ CopySpanIntersectingRange: [source_register=Register(2), source_range_register=Register(0), update_register=Register(8)]
952+ )" ;
953+ RunBytecodeTest (df, filters, {}, {}, {}, expected_bytecode,
954+ /* cols_used=*/ 1 );
955+ }
956+
957+ TEST_F (DataframeBytecodeTest, PlanQuery_SingleColIndex_InFilter_NonNullInt) {
958+ static constexpr auto kSpec = CreateTypedDataframeSpec (
959+ {" col1" }, CreateTypedColumnSpec (Uint32{}, NonNull{}, Unsorted{}));
960+ Dataframe df = Dataframe::CreateFromTypedSpec (kSpec , &string_pool_);
961+ for (uint32_t i = 0 ; i < 100 ; ++i) {
962+ df.InsertUnchecked (kSpec , i);
963+ }
964+ df.Finalize ();
965+
966+ std::vector<uint32_t > p_vec (100 );
967+ std::iota (p_vec.begin (), p_vec.end (), 0 );
968+ df.AddIndex (
969+ Index ({0 }, std::make_shared<std::vector<uint32_t >>(std::move (p_vec))));
970+
971+ std::vector<FilterSpec> filters = {{0 , 0 , In{}, std::nullopt }};
972+ std::string expected_bytecode = R"(
973+ InitRange: [size=100, dest_register=Register(0)]
974+ CastFilterValueList<Uint32>: [fval_handle=FilterValue(0), write_register=Register(3), op=NonNullOp(0)]
975+ IndexedFilterIn<Uint32, NonNull>: [storage_register=Register(4), null_bv_register=Register(5), value_list_register=Register(3), popcount_register=Register(6), source_register=Register(1), dest_register=Register(2)]
950976 AllocateIndices: [size=100, dest_slab_register=Register(7), dest_span_register=Register(8)]
951977 CopySpanIntersectingRange: [source_register=Register(2), source_range_register=Register(0), update_register=Register(8)]
952978 )" ;
@@ -976,13 +1002,13 @@ TEST_F(DataframeBytecodeTest,
9761002 std::string expected_bytecode = R"(
9771003 InitRange: [size=4, dest_register=Register(0)]
9781004 CastFilterValue<String>: [fval_handle=FilterValue(0), write_register=Register(3), op=NonNullOp(0)]
979- PrefixPopcount : [null_bv_register=Register(5), dest_register=Register(4 )]
980- IndexedFilterEq<String, SparseNull> : [storage_register=Register(6), null_bv_register=Register(5), filter_value_reg=Register(3), popcount_register=Register(4), source_register=Register(1), dest_register=Register(2 )]
1005+ IndexedFilterEq<String, SparseNull> : [storage_register=Register(4), null_bv_register=Register(5), filter_value_reg=Register(3), popcount_register=Register(6), source_register=Register(1), dest_register=Register(2 )]
1006+ PrefixPopcount : [null_bv_register=Register(5), dest_register=Register(6 )]
9811007 AllocateIndices: [size=4, dest_slab_register=Register(7), dest_span_register=Register(8)]
9821008 CopySpanIntersectingRange: [source_register=Register(2), source_range_register=Register(0), update_register=Register(8)]
9831009 AllocateIndices: [size=8, dest_slab_register=Register(9), dest_span_register=Register(10)]
9841010 StrideCopy: [source_register=Register(8), update_register=Register(10), stride=2]
985- StrideTranslateAndCopySparseNullIndices: [null_bv_register=Register(5), popcount_register=Register(4 ), update_register=Register(10), offset=1, stride=2]
1011+ StrideTranslateAndCopySparseNullIndices: [null_bv_register=Register(5), popcount_register=Register(6 ), update_register=Register(10), offset=1, stride=2]
9861012 )" ;
9871013 RunBytecodeTest (df, filters, {}, {}, {}, expected_bytecode);
9881014}
@@ -1005,12 +1031,12 @@ TEST_F(DataframeBytecodeTest, PlanQuery_SingleColIndex_EqFilter_DenseNullInt) {
10051031 std::string expected_bytecode = R"(
10061032 InitRange: [size=4, dest_register=Register(0)]
10071033 CastFilterValue<Uint32>: [fval_handle=FilterValue(0), write_register=Register(3), op=NonNullOp(0)]
1008- IndexedFilterEq<Uint32, DenseNull>: [storage_register=Register(5 ), null_bv_register=Register(6 ), filter_value_reg=Register(3), popcount_register=Register(4 ), source_register=Register(1), dest_register=Register(2)]
1034+ IndexedFilterEq<Uint32, DenseNull>: [storage_register=Register(4 ), null_bv_register=Register(5 ), filter_value_reg=Register(3), popcount_register=Register(6 ), source_register=Register(1), dest_register=Register(2)]
10091035 AllocateIndices: [size=4, dest_slab_register=Register(7), dest_span_register=Register(8)]
10101036 CopySpanIntersectingRange: [source_register=Register(2), source_range_register=Register(0), update_register=Register(8)]
10111037 AllocateIndices: [size=8, dest_slab_register=Register(9), dest_span_register=Register(10)]
10121038 StrideCopy: [source_register=Register(8), update_register=Register(10), stride=2]
1013- StrideCopyDenseNullIndices: [null_bv_register=Register(6 ), update_register=Register(10), offset=1, stride=2]
1039+ StrideCopyDenseNullIndices: [null_bv_register=Register(5 ), update_register=Register(10), offset=1, stride=2]
10141040 )" ;
10151041 RunBytecodeTest (df, filters, {}, {}, {}, expected_bytecode);
10161042}
@@ -1040,9 +1066,9 @@ TEST_F(DataframeBytecodeTest, PlanQuery_MultiColIndex_PrefixEqFilters) {
10401066 std::string expected_bytecode = R"(
10411067 InitRange: [size=4, dest_register=Register(0)]
10421068 CastFilterValue<Uint32>: [fval_handle=FilterValue(0), write_register=Register(3), op=NonNullOp(0)]
1043- IndexedFilterEq<Uint32, NonNull>: [storage_register=Register(5 ), null_bv_register=Register(6 ), filter_value_reg=Register(3), popcount_register=Register(4 ), source_register=Register(1), dest_register=Register(2)]
1069+ IndexedFilterEq<Uint32, NonNull>: [storage_register=Register(4 ), null_bv_register=Register(5 ), filter_value_reg=Register(3), popcount_register=Register(6 ), source_register=Register(1), dest_register=Register(2)]
10441070 CastFilterValue<Uint32>: [fval_handle=FilterValue(1), write_register=Register(7), op=NonNullOp(0)]
1045- IndexedFilterEq<Uint32, NonNull>: [storage_register=Register(9 ), null_bv_register=Register(10 ), filter_value_reg=Register(7), popcount_register=Register(8 ), source_register=Register(2), dest_register=Register(2)]
1071+ IndexedFilterEq<Uint32, NonNull>: [storage_register=Register(8 ), null_bv_register=Register(9 ), filter_value_reg=Register(7), popcount_register=Register(10 ), source_register=Register(2), dest_register=Register(2)]
10461072 AllocateIndices: [size=4, dest_slab_register=Register(11), dest_span_register=Register(12)]
10471073 CopySpanIntersectingRange: [source_register=Register(2), source_range_register=Register(0), update_register=Register(12)]
10481074 )" ;
@@ -1674,6 +1700,42 @@ TEST(DataframeTest, TypedCursorInFilterReexecute) {
16741700 ASSERT_TRUE (cursor.Eof ());
16751701}
16761702
1703+ TEST (DataframeTest, TypedCursorInFilterWithIndex) {
1704+ static constexpr auto kSpec = CreateTypedDataframeSpec (
1705+ {" id" , " track_id" }, CreateTypedColumnSpec (Id (), NonNull (), IdSorted ()),
1706+ CreateTypedColumnSpec (Uint32 (), NonNull (), Unsorted ()));
1707+ StringPool pool;
1708+ Dataframe df = Dataframe::CreateFromTypedSpec (kSpec , &pool);
1709+ // Insert rows with track_ids: 1, 2, 1, 3, 2, 1
1710+ df.InsertUnchecked (kSpec , std::monostate (), 1u );
1711+ df.InsertUnchecked (kSpec , std::monostate (), 2u );
1712+ df.InsertUnchecked (kSpec , std::monostate (), 1u );
1713+ df.InsertUnchecked (kSpec , std::monostate (), 3u );
1714+ df.InsertUnchecked (kSpec , std::monostate (), 2u );
1715+ df.InsertUnchecked (kSpec , std::monostate (), 1u );
1716+ df.Finalize ();
1717+
1718+ // Build an index on track_id (column 1). Sorted order: 1,1,1,2,2,3
1719+ // Permutation: {0, 2, 5, 1, 4, 3}
1720+ df.AddIndex (Index ({1 }, std::make_shared<std::vector<uint32_t >>(
1721+ std::vector<uint32_t >{0 , 2 , 5 , 1 , 4 , 3 })));
1722+
1723+ // IN (1, 3) should return rows 0, 2, 5 (track_id=1) and 3 (track_id=3).
1724+ using FV = TypedCursor::FilterValue;
1725+ TypedCursor cursor (&df, {FilterSpec{1 , 0 , In{}, {}}}, {});
1726+ FV values[] = {int64_t (1 ), int64_t (3 )};
1727+ cursor.SetFilterValueListUnchecked (0 , values, 2 );
1728+ cursor.ExecuteUnchecked ();
1729+
1730+ std::vector<uint32_t > result_ids;
1731+ while (!cursor.Eof ()) {
1732+ result_ids.push_back (cursor.GetCellUnchecked <0 >(kSpec ));
1733+ cursor.Next ();
1734+ }
1735+ // Results should contain ids for rows 0, 2, 3, 5 (in some order).
1736+ EXPECT_THAT (result_ids, testing::UnorderedElementsAre (0 , 2 , 3 , 5 ));
1737+ }
1738+
16771739TEST (DataframeTest,
16781740 QueryPlanEqualityFilterOnNoDuplicatesColumnEstimatesOneRow) {
16791741 static constexpr auto kSpec = CreateTypedDataframeSpec (
0 commit comments