2323#include " parquet/bloom_filter.h"
2424#include " parquet/bloom_filter_reader.h"
2525#include " parquet/bloom_filter_writer.h"
26+ #include " parquet/exception.h"
2627#include " parquet/file_reader.h"
2728#include " parquet/test_util.h"
2829
@@ -94,14 +95,13 @@ TEST(BloomFilterBuilder, BasicRoundTrip) {
9495 const auto bitset_size = BlockSplitBloomFilter::OptimalNumOfBytes (
9596 bloom_filter_options.ndv , bloom_filter_options.fpp );
9697 WriterProperties::Builder properties_builder;
97- properties_builder.enable_bloom_filter (bloom_filter_options, " c1" );
98+ properties_builder.enable_bloom_filter (" c1" , bloom_filter_options );
9899 auto writer_properties = properties_builder.build ();
99100 auto bloom_filter_builder = BloomFilterBuilder::Make (&schema, writer_properties.get ());
100101
101102 auto write_row_group_bloom_filter = [&](const std::vector<uint64_t >& hash_values) {
102103 bloom_filter_builder->AppendRowGroup ();
103- auto bloom_filter =
104- bloom_filter_builder->GetOrCreateBloomFilter (/* column_ordinal=*/ 0 );
104+ auto bloom_filter = bloom_filter_builder->CreateBloomFilter (/* column_ordinal=*/ 0 );
105105 ASSERT_NE (bloom_filter, nullptr );
106106 ASSERT_EQ (bloom_filter->GetBitsetSize (), bitset_size);
107107 for (uint64_t hash_value : hash_values) {
@@ -113,9 +113,8 @@ TEST(BloomFilterBuilder, BasicRoundTrip) {
113113 write_row_group_bloom_filter ({300 , 400 });
114114
115115 auto sink = CreateOutputStream ();
116- IndexLocations bloom_filter_location;
117- bloom_filter_builder->WriteTo (sink.get (), &bloom_filter_location);
118- ASSERT_EQ (bloom_filter_location.locations .size (), 2 );
116+ auto locations = bloom_filter_builder->WriteTo (sink.get ());
117+ ASSERT_EQ (locations.size (), 2 );
119118 ASSERT_OK_AND_ASSIGN (auto buffer, sink->Finish ());
120119
121120 struct RowGroupBloomFilterCase {
@@ -129,11 +128,12 @@ TEST(BloomFilterBuilder, BasicRoundTrip) {
129128 /* non_existing_values=*/ {300 , 400 }},
130129 RowGroupBloomFilterCase{/* row_group_id=*/ 1 , /* existing_values=*/ {300 , 400 },
131130 /* non_existing_values=*/ {100 , 200 }}}) {
132- auto row_group_bloom_filter = bloom_filter_location.locations .find (c.row_group_id );
133- ASSERT_NE (row_group_bloom_filter, bloom_filter_location.locations .cend ());
134-
135- auto bloom_filter_location = row_group_bloom_filter->second .find (0 );
136- ASSERT_NE (bloom_filter_location, row_group_bloom_filter->second .cend ());
131+ auto bloom_filter_location =
132+ std::find_if (locations.begin (), locations.end (), [&](const auto & location) {
133+ return location.first .row_group_index == c.row_group_id &&
134+ location.first .column_index == 0 ;
135+ });
136+ ASSERT_NE (bloom_filter_location, locations.end ());
137137 int64_t bloom_filter_offset = bloom_filter_location->second .offset ;
138138 int32_t bloom_filter_length = bloom_filter_location->second .length ;
139139
@@ -158,45 +158,48 @@ TEST(BloomFilterBuilder, InvalidOperations) {
158158
159159 WriterProperties::Builder properties_builder;
160160 BloomFilterOptions bloom_filter_options{100 , 0.05 };
161- properties_builder.enable_bloom_filter (bloom_filter_options, " c1" );
162- properties_builder.enable_bloom_filter (bloom_filter_options, " c2" );
161+ properties_builder.enable_bloom_filter (" c1" , bloom_filter_options );
162+ properties_builder.enable_bloom_filter (" c2" , bloom_filter_options );
163163 auto properties = properties_builder.build ();
164164 auto bloom_filter_builder = BloomFilterBuilder::Make (&schema, properties.get ());
165165
166166 // AppendRowGroup() is not called yet.
167167 EXPECT_THROW_THAT (
168- [&]() { bloom_filter_builder->GetOrCreateBloomFilter (/* column_ordinal=*/ 0 ); },
168+ [&]() { bloom_filter_builder->CreateBloomFilter (/* column_ordinal=*/ 0 ); },
169169 ParquetException,
170170 ::testing::Property (
171171 &ParquetException::what,
172172 ::testing::HasSubstr (" No row group appended to BloomFilterBuilder" )));
173173
174174 // Column ordinal is out of bound.
175175 bloom_filter_builder->AppendRowGroup ();
176- EXPECT_THROW_THAT ([&]() { bloom_filter_builder->GetOrCreateBloomFilter (2 ); },
176+ EXPECT_THROW_THAT ([&]() { bloom_filter_builder->CreateBloomFilter (2 ); },
177177 ParquetException,
178178 ::testing::Property (&ParquetException::what,
179179 ::testing::HasSubstr (" Invalid column ordinal" )));
180180
181181 // Boolean type is not supported.
182182 EXPECT_THROW_THAT (
183- [&]() { bloom_filter_builder->GetOrCreateBloomFilter (1 ); }, ParquetException,
183+ [&]() { bloom_filter_builder->CreateBloomFilter (1 ); }, ParquetException,
184184 ::testing::Property (
185185 &ParquetException::what,
186186 ::testing::HasSubstr (" BloomFilterBuilder does not support boolean type" )));
187187
188- // Get a created bloom filter should succeed.
189- auto bloom_filter = bloom_filter_builder->GetOrCreateBloomFilter (0 );
190- ASSERT_EQ (bloom_filter_builder->GetOrCreateBloomFilter (0 ), bloom_filter);
188+ // Create a created bloom filter should throw.
189+ ASSERT_NO_THROW (bloom_filter_builder->CreateBloomFilter (0 ));
190+ EXPECT_THROW_THAT (
191+ [&]() { bloom_filter_builder->CreateBloomFilter (0 ); }, ParquetException,
192+ ::testing::Property (
193+ &ParquetException::what,
194+ ::testing::HasSubstr (" Bloom filter already exists for column: 0" )));
191195
192196 auto sink = CreateOutputStream();
193- IndexLocations location;
194- ASSERT_NO_FATAL_FAILURE (bloom_filter_builder->WriteTo (sink.get(), &location));
195- ASSERT_EQ (location.locations.size(), 1);
197+ auto locations = bloom_filter_builder->WriteTo (sink.get());
198+ ASSERT_EQ (locations.size(), 1);
196199
197200 // WriteTo() has been called already.
198201 EXPECT_THROW_THAT (
199- [&]() { bloom_filter_builder->WriteTo (sink.get (), &location ); }, ParquetException,
202+ [&]() { bloom_filter_builder->WriteTo (sink.get ()); }, ParquetException,
200203 ::testing::Property (
201204 &ParquetException::what,
202205 ::testing::HasSubstr (" Cannot write a finished BloomFilterBuilder" )));
0 commit comments