Skip to content

Commit 789d130

Browse files
committed
address comments
1 parent 0f50418 commit 789d130

18 files changed

+285
-281
lines changed

cpp/src/parquet/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ add_parquet_test(arrow-reader-writer-test
409409
arrow/arrow_statistics_test.cc
410410
arrow/variant_test.cc)
411411

412-
add_parquet_test(arrow-parquet-index-test SOURCES arrow/arrow_parquet_index_test.cc)
412+
add_parquet_test(arrow-index-test SOURCES arrow/index_test.cc)
413413

414414
add_parquet_test(arrow-internals-test SOURCES arrow/path_internal_test.cc
415415
arrow/reconstruct_internal_test.cc)

cpp/src/parquet/arrow/arrow_parquet_index_test.cc renamed to cpp/src/parquet/arrow/index_test.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -536,8 +536,8 @@ TEST_F(ParquetBloomFilterRoundTripTest, SimpleRoundTrip) {
536536
{::arrow::field("c0", ::arrow::int64()), ::arrow::field("c1", ::arrow::utf8())});
537537
BloomFilterOptions options{10, 0.05};
538538
auto writer_properties = WriterProperties::Builder()
539-
.enable_bloom_filter(options, "c0")
540-
->enable_bloom_filter(options, "c1")
539+
.enable_bloom_filter("c0", options)
540+
->enable_bloom_filter("c1", options)
541541
->max_row_group_length(4)
542542
->build();
543543
auto table = ::arrow::TableFromJSON(
@@ -565,8 +565,8 @@ TEST_F(ParquetBloomFilterRoundTripTest, SimpleRoundTripDictionary) {
565565

566566
BloomFilterOptions options{10, 0.05};
567567
auto writer_properties = WriterProperties::Builder()
568-
.enable_bloom_filter(options, "c0")
569-
->enable_bloom_filter(options, "c1")
568+
.enable_bloom_filter("c0", options)
569+
->enable_bloom_filter("c1", options)
570570
->max_row_group_length(4)
571571
->build();
572572

@@ -625,7 +625,7 @@ TEST_F(ParquetBloomFilterRoundTripTest, SimpleRoundTripWithOneFilter) {
625625
{::arrow::field("c0", ::arrow::int64()), ::arrow::field("c1", ::arrow::utf8())});
626626
BloomFilterOptions options{10, 0.05};
627627
auto writer_properties = WriterProperties::Builder()
628-
.enable_bloom_filter(options, "c0")
628+
.enable_bloom_filter("c0", options)
629629
->disable_bloom_filter("c1")
630630
->max_row_group_length(4)
631631
->build();
@@ -650,7 +650,7 @@ TEST_F(ParquetBloomFilterRoundTripTest, SimpleRoundTripWithOneFilter) {
650650
TEST_F(ParquetBloomFilterRoundTripTest, ThrowForBoolean) {
651651
BloomFilterOptions options{10, 0.05};
652652
auto writer_properties = WriterProperties::Builder()
653-
.enable_bloom_filter(options, "boolean_col")
653+
.enable_bloom_filter("boolean_col", options)
654654
->max_row_group_length(4)
655655
->build();
656656

cpp/src/parquet/bloom_filter_reader_writer_test.cc

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "parquet/bloom_filter.h"
2424
#include "parquet/bloom_filter_reader.h"
2525
#include "parquet/bloom_filter_writer.h"
26+
#include "parquet/exception.h"
2627
#include "parquet/file_reader.h"
2728
#include "parquet/test_util.h"
2829

@@ -94,14 +95,13 @@ TEST(BloomFilterBuilder, BasicRoundTrip) {
9495
const auto bitset_size = BlockSplitBloomFilter::OptimalNumOfBytes(
9596
bloom_filter_options.ndv, bloom_filter_options.fpp);
9697
WriterProperties::Builder properties_builder;
97-
properties_builder.enable_bloom_filter(bloom_filter_options, "c1");
98+
properties_builder.enable_bloom_filter("c1", bloom_filter_options);
9899
auto writer_properties = properties_builder.build();
99100
auto bloom_filter_builder = BloomFilterBuilder::Make(&schema, writer_properties.get());
100101

101102
auto write_row_group_bloom_filter = [&](const std::vector<uint64_t>& hash_values) {
102103
bloom_filter_builder->AppendRowGroup();
103-
auto bloom_filter =
104-
bloom_filter_builder->GetOrCreateBloomFilter(/*column_ordinal=*/0);
104+
auto bloom_filter = bloom_filter_builder->CreateBloomFilter(/*column_ordinal=*/0);
105105
ASSERT_NE(bloom_filter, nullptr);
106106
ASSERT_EQ(bloom_filter->GetBitsetSize(), bitset_size);
107107
for (uint64_t hash_value : hash_values) {
@@ -113,9 +113,8 @@ TEST(BloomFilterBuilder, BasicRoundTrip) {
113113
write_row_group_bloom_filter({300, 400});
114114

115115
auto sink = CreateOutputStream();
116-
IndexLocations bloom_filter_location;
117-
bloom_filter_builder->WriteTo(sink.get(), &bloom_filter_location);
118-
ASSERT_EQ(bloom_filter_location.locations.size(), 2);
116+
auto locations = bloom_filter_builder->WriteTo(sink.get());
117+
ASSERT_EQ(locations.size(), 2);
119118
ASSERT_OK_AND_ASSIGN(auto buffer, sink->Finish());
120119

121120
struct RowGroupBloomFilterCase {
@@ -129,11 +128,12 @@ TEST(BloomFilterBuilder, BasicRoundTrip) {
129128
/*non_existing_values=*/{300, 400}},
130129
RowGroupBloomFilterCase{/*row_group_id=*/1, /*existing_values=*/{300, 400},
131130
/*non_existing_values=*/{100, 200}}}) {
132-
auto row_group_bloom_filter = bloom_filter_location.locations.find(c.row_group_id);
133-
ASSERT_NE(row_group_bloom_filter, bloom_filter_location.locations.cend());
134-
135-
auto bloom_filter_location = row_group_bloom_filter->second.find(0);
136-
ASSERT_NE(bloom_filter_location, row_group_bloom_filter->second.cend());
131+
auto bloom_filter_location =
132+
std::find_if(locations.begin(), locations.end(), [&](const auto& location) {
133+
return location.first.row_group_index == c.row_group_id &&
134+
location.first.column_index == 0;
135+
});
136+
ASSERT_NE(bloom_filter_location, locations.end());
137137
int64_t bloom_filter_offset = bloom_filter_location->second.offset;
138138
int32_t bloom_filter_length = bloom_filter_location->second.length;
139139

@@ -158,45 +158,48 @@ TEST(BloomFilterBuilder, InvalidOperations) {
158158

159159
WriterProperties::Builder properties_builder;
160160
BloomFilterOptions bloom_filter_options{100, 0.05};
161-
properties_builder.enable_bloom_filter(bloom_filter_options, "c1");
162-
properties_builder.enable_bloom_filter(bloom_filter_options, "c2");
161+
properties_builder.enable_bloom_filter("c1", bloom_filter_options);
162+
properties_builder.enable_bloom_filter("c2", bloom_filter_options);
163163
auto properties = properties_builder.build();
164164
auto bloom_filter_builder = BloomFilterBuilder::Make(&schema, properties.get());
165165

166166
// AppendRowGroup() is not called yet.
167167
EXPECT_THROW_THAT(
168-
[&]() { bloom_filter_builder->GetOrCreateBloomFilter(/*column_ordinal=*/0); },
168+
[&]() { bloom_filter_builder->CreateBloomFilter(/*column_ordinal=*/0); },
169169
ParquetException,
170170
::testing::Property(
171171
&ParquetException::what,
172172
::testing::HasSubstr("No row group appended to BloomFilterBuilder")));
173173

174174
// Column ordinal is out of bound.
175175
bloom_filter_builder->AppendRowGroup();
176-
EXPECT_THROW_THAT([&]() { bloom_filter_builder->GetOrCreateBloomFilter(2); },
176+
EXPECT_THROW_THAT([&]() { bloom_filter_builder->CreateBloomFilter(2); },
177177
ParquetException,
178178
::testing::Property(&ParquetException::what,
179179
::testing::HasSubstr("Invalid column ordinal")));
180180

181181
// Boolean type is not supported.
182182
EXPECT_THROW_THAT(
183-
[&]() { bloom_filter_builder->GetOrCreateBloomFilter(1); }, ParquetException,
183+
[&]() { bloom_filter_builder->CreateBloomFilter(1); }, ParquetException,
184184
::testing::Property(
185185
&ParquetException::what,
186186
::testing::HasSubstr("BloomFilterBuilder does not support boolean type")));
187187

188-
// Get a created bloom filter should succeed.
189-
auto bloom_filter = bloom_filter_builder->GetOrCreateBloomFilter(0);
190-
ASSERT_EQ(bloom_filter_builder->GetOrCreateBloomFilter(0), bloom_filter);
188+
// Create a created bloom filter should throw.
189+
ASSERT_NO_THROW(bloom_filter_builder->CreateBloomFilter(0));
190+
EXPECT_THROW_THAT(
191+
[&]() { bloom_filter_builder->CreateBloomFilter(0); }, ParquetException,
192+
::testing::Property(
193+
&ParquetException::what,
194+
::testing::HasSubstr("Bloom filter already exists for column: 0")));
191195

192196
auto sink = CreateOutputStream();
193-
IndexLocations location;
194-
ASSERT_NO_FATAL_FAILURE(bloom_filter_builder->WriteTo(sink.get(), &location));
195-
ASSERT_EQ(location.locations.size(), 1);
197+
auto locations = bloom_filter_builder->WriteTo(sink.get());
198+
ASSERT_EQ(locations.size(), 1);
196199

197200
// WriteTo() has been called already.
198201
EXPECT_THROW_THAT(
199-
[&]() { bloom_filter_builder->WriteTo(sink.get(), &location); }, ParquetException,
202+
[&]() { bloom_filter_builder->WriteTo(sink.get()); }, ParquetException,
200203
::testing::Property(
201204
&ParquetException::what,
202205
::testing::HasSubstr("Cannot write a finished BloomFilterBuilder")));

0 commit comments

Comments
 (0)