Skip to content

Commit d4176ae

Browse files
committed
lint
1 parent e19cc5e commit d4176ae

File tree

5 files changed

+70
-81
lines changed

5 files changed

+70
-81
lines changed

cpp/cmake_modules/ThirdpartyToolchain.cmake

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2628,9 +2628,8 @@ endfunction()
26282628

26292629
if(ARROW_WITH_FSST)
26302630
if(NOT fsst_SOURCE STREQUAL "BUNDLED")
2631-
message(
2632-
FATAL_ERROR
2633-
"FSST must currently be built from source. Set fsst_SOURCE=BUNDLED.")
2631+
message(FATAL_ERROR "FSST must currently be built from source. Set fsst_SOURCE=BUNDLED."
2632+
)
26342633
endif()
26352634
resolve_dependency(fsst IS_RUNTIME_DEPENDENCY FALSE)
26362635
endif()

cpp/src/parquet/CMakeLists.txt

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -206,19 +206,18 @@ if(DEFINED ARROW_FSST_INCLUDE_DIR)
206206
list(APPEND PARQUET_TEST_EXTRA_INCLUDES ${ARROW_FSST_INCLUDE_DIR})
207207
endif()
208208
if(DEFINED ARROW_FSST_SOURCES)
209-
set_property(
210-
SOURCE ${ARROW_FSST_SOURCES}
211-
APPEND
212-
PROPERTY COMPILE_OPTIONS
213-
"$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>:-Wno-error=shorten-64-to-32;-Wno-shorten-64-to-32>"
214-
"$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:-Wno-error=missing-declarations;-Wno-missing-declarations>"
215-
"$<$<CXX_COMPILER_ID:MSVC>:/wd4244>")
216-
set_property(
217-
SOURCE ${ARROW_FSST_SOURCES}
218-
APPEND
219-
PROPERTY COMPILE_OPTIONS
220-
"$<$<AND:$<PLATFORM_ID:Windows>,$<NOT:$<CXX_COMPILER_ID:MSVC>>>:-include>"
221-
"$<$<AND:$<PLATFORM_ID:Windows>,$<NOT:$<CXX_COMPILER_ID:MSVC>>>:${CMAKE_CURRENT_SOURCE_DIR}/fsst_compat.h>")
209+
set_property(SOURCE ${ARROW_FSST_SOURCES}
210+
APPEND
211+
PROPERTY COMPILE_OPTIONS
212+
"$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>:-Wno-error=shorten-64-to-32;-Wno-shorten-64-to-32>"
213+
"$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:-Wno-error=missing-declarations;-Wno-missing-declarations>"
214+
"$<$<CXX_COMPILER_ID:MSVC>:/wd4244>")
215+
set_property(SOURCE ${ARROW_FSST_SOURCES}
216+
APPEND
217+
PROPERTY COMPILE_OPTIONS
218+
"$<$<AND:$<PLATFORM_ID:Windows>,$<NOT:$<CXX_COMPILER_ID:MSVC>>>:-include>"
219+
"$<$<AND:$<PLATFORM_ID:Windows>,$<NOT:$<CXX_COMPILER_ID:MSVC>>>:${CMAKE_CURRENT_SOURCE_DIR}/fsst_compat.h>"
220+
)
222221
endif()
223222

224223
if(ARROW_HAVE_RUNTIME_AVX2)
@@ -339,8 +338,8 @@ add_arrow_lib(parquet
339338
if(PARQUET_PRIVATE_INCLUDE_DIRS)
340339
foreach(_parquet_target parquet_objlib parquet_shared parquet_static)
341340
if(TARGET ${_parquet_target})
342-
target_include_directories(
343-
${_parquet_target} PRIVATE ${PARQUET_PRIVATE_INCLUDE_DIRS})
341+
target_include_directories(${_parquet_target}
342+
PRIVATE ${PARQUET_PRIVATE_INCLUDE_DIRS})
344343
endif()
345344
endforeach()
346345
endif()

cpp/src/parquet/decoder.cc

Lines changed: 32 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@
5050
#include "arrow/util/ubsan.h"
5151
#include "arrow/visit_data_inline.h"
5252

53+
#include "fsst.h" // NOLINT(build/include_subdir)
5354
#include "parquet/exception.h"
5455
#include "parquet/platform.h"
5556
#include "parquet/schema.h"
56-
#include "fsst.h" // NOLINT(build/include_subdir)
5757
#include "parquet/types.h"
5858

5959
#ifdef _MSC_VER
@@ -2378,8 +2378,7 @@ class FsstDecoder : public DecoderImpl, virtual public TypedDecoder<ByteArrayTyp
23782378
}
23792379

23802380
int DecodeSpaced(ByteArray* buffer, int num_values, int null_count,
2381-
const uint8_t* valid_bits,
2382-
int64_t valid_bits_offset) override {
2381+
const uint8_t* valid_bits, int64_t valid_bits_offset) override {
23832382
if (null_count == 0) {
23842383
return Decode(buffer, num_values);
23852384
}
@@ -2409,19 +2408,18 @@ class FsstDecoder : public DecoderImpl, virtual public TypedDecoder<ByteArrayTyp
24092408
int64_t valid_bits_offset,
24102409
typename EncodingTraits<ByteArrayType>::Accumulator* builder) override {
24112410
int values_decoded = 0;
2412-
PARQUET_THROW_NOT_OK(
2413-
DecodeArrowDense(num_values, null_count, valid_bits, valid_bits_offset, builder,
2414-
&values_decoded));
2411+
PARQUET_THROW_NOT_OK(DecodeArrowDense(num_values, null_count, valid_bits,
2412+
valid_bits_offset, builder, &values_decoded));
24152413
return values_decoded;
24162414
}
24172415

2418-
int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
2419-
int64_t valid_bits_offset,
2420-
typename EncodingTraits<ByteArrayType>::DictAccumulator* builder) override {
2416+
int DecodeArrow(
2417+
int num_values, int null_count, const uint8_t* valid_bits,
2418+
int64_t valid_bits_offset,
2419+
typename EncodingTraits<ByteArrayType>::DictAccumulator* builder) override {
24212420
int values_decoded = 0;
2422-
PARQUET_THROW_NOT_OK(
2423-
DecodeArrowDict(num_values, null_count, valid_bits, valid_bits_offset, builder,
2424-
&values_decoded));
2421+
PARQUET_THROW_NOT_OK(DecodeArrowDict(num_values, null_count, valid_bits,
2422+
valid_bits_offset, builder, &values_decoded));
24252423
return values_decoded;
24262424
}
24272425

@@ -2482,32 +2480,31 @@ class FsstDecoder : public DecoderImpl, virtual public TypedDecoder<ByteArrayTyp
24822480
RETURN_NOT_OK(builder->Reserve(num_values));
24832481

24842482
int value_index = 0;
2485-
RETURN_NOT_OK(VisitBitRuns(
2486-
valid_bits, valid_bits_offset, num_values,
2487-
[&](int64_t position, int64_t run_length, bool is_valid) {
2488-
if (is_valid) {
2489-
for (int64_t i = 0; i < run_length; ++i) {
2490-
const auto& value = temp_values_[value_index++];
2491-
RETURN_NOT_OK(builder->Append(value.ptr, static_cast<int32_t>(value.len)));
2492-
}
2493-
} else {
2494-
RETURN_NOT_OK(builder->AppendNulls(run_length));
2495-
}
2496-
return Status::OK();
2497-
}));
2483+
RETURN_NOT_OK(VisitBitRuns(valid_bits, valid_bits_offset, num_values,
2484+
[&](int64_t position, int64_t run_length, bool is_valid) {
2485+
if (is_valid) {
2486+
for (int64_t i = 0; i < run_length; ++i) {
2487+
const auto& value = temp_values_[value_index++];
2488+
RETURN_NOT_OK(builder->Append(
2489+
value.ptr, static_cast<int32_t>(value.len)));
2490+
}
2491+
} else {
2492+
RETURN_NOT_OK(builder->AppendNulls(run_length));
2493+
}
2494+
return Status::OK();
2495+
}));
24982496

24992497
*out_values_decoded = decoded;
25002498
return Status::OK();
25012499
}
25022500

25032501
uint8_t* EnsureDecodeBuffer(int64_t capacity) {
2504-
const int64_t min_capacity =
2505-
std::max<int64_t>(capacity, kInitialDecodeBufferSize);
2502+
const int64_t min_capacity = std::max<int64_t>(capacity, kInitialDecodeBufferSize);
25062503
const int64_t target = ::arrow::bit_util::NextPower2(min_capacity);
25072504

25082505
if (!decode_buffer_) {
2509-
PARQUET_ASSIGN_OR_THROW(
2510-
decode_buffer_, ::arrow::AllocateResizableBuffer(target, pool_));
2506+
PARQUET_ASSIGN_OR_THROW(decode_buffer_,
2507+
::arrow::AllocateResizableBuffer(target, pool_));
25112508
} else if (decode_buffer_->size() < target) {
25122509
PARQUET_THROW_NOT_OK(decode_buffer_->Resize(target, false));
25132510
}
@@ -2516,26 +2513,24 @@ class FsstDecoder : public DecoderImpl, virtual public TypedDecoder<ByteArrayTyp
25162513

25172514
size_t DecompressValue(const uint8_t* compressed_ptr, uint32_t compressed_len,
25182515
uint8_t** value_ptr) {
2519-
EnsureDecodeBuffer(decode_buffer_size_ +
2520-
OutputUpperBound(compressed_len));
2516+
EnsureDecodeBuffer(decode_buffer_size_ + OutputUpperBound(compressed_len));
25212517

25222518
while (true) {
25232519
uint8_t* destination = decode_buffer_->mutable_data() + decode_buffer_size_;
25242520
const size_t available =
25252521
static_cast<size_t>(decode_buffer_->size() - decode_buffer_size_);
25262522

2527-
const size_t decompressed =
2528-
fsst_decompress(&decoder_, compressed_len, compressed_ptr, available,
2529-
destination);
2523+
const size_t decompressed = fsst_decompress(&decoder_, compressed_len,
2524+
compressed_ptr, available, destination);
25302525

25312526
if (decompressed > 0 || compressed_len == 0) {
25322527
*value_ptr = destination;
25332528
return decompressed;
25342529
}
25352530

2536-
int64_t new_capacity = std::max<int64_t>(
2537-
decode_buffer_->size() * 2,
2538-
decode_buffer_size_ + OutputUpperBound(compressed_len));
2531+
int64_t new_capacity =
2532+
std::max<int64_t>(decode_buffer_->size() * 2,
2533+
decode_buffer_size_ + OutputUpperBound(compressed_len));
25392534
if (new_capacity <= decode_buffer_->size()) {
25402535
throw ParquetException("FSST decompression failed");
25412536
}

cpp/src/parquet/encoder.cc

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@
4747
#include "arrow/util/ubsan.h"
4848
#include "arrow/visit_data_inline.h"
4949

50+
#include "fsst.h" // NOLINT(build/include_subdir)
5051
#include "parquet/exception.h"
5152
#include "parquet/platform.h"
5253
#include "parquet/schema.h"
53-
#include "fsst.h" // NOLINT(build/include_subdir)
5454
#include "parquet/types.h"
5555

5656
#ifdef _MSC_VER
@@ -1758,10 +1758,8 @@ class FsstEncoder : public EncoderImpl, virtual public TypedEncoder<ByteArrayTyp
17581758

17591759
int64_t EstimatedDataEncodedSize() override {
17601760
const int64_t total_size = pending_unencoded_bytes_;
1761-
const double scaled =
1762-
static_cast<double>(total_size) * compression_ratio_hint_;
1763-
const int64_t estimated_payload =
1764-
static_cast<int64_t>(std::ceil(scaled));
1761+
const double scaled = static_cast<double>(total_size) * compression_ratio_hint_;
1762+
const int64_t estimated_payload = static_cast<int64_t>(std::ceil(scaled));
17651763
return static_cast<int64_t>(sizeof(fsst_decoder_t)) +
17661764
std::max<int64_t>(0, estimated_payload);
17671765
}
@@ -1777,12 +1775,11 @@ class FsstEncoder : public EncoderImpl, virtual public TypedEncoder<ByteArrayTyp
17771775
const int64_t total_input_size = pending_unencoded_bytes_;
17781776

17791777
const int64_t decoder_bytes = static_cast<int64_t>(sizeof(fsst_decoder_t));
1780-
const int64_t length_prefix_bytes =
1781-
static_cast<int64_t>(unencoded_values_.size()) *
1782-
static_cast<int64_t>(sizeof(uint32_t));
1783-
const int64_t estimated_buffer_size =
1784-
decoder_bytes + total_input_size * kFsstCompressionExpansion +
1785-
length_prefix_bytes;
1778+
const int64_t length_prefix_bytes = static_cast<int64_t>(unencoded_values_.size()) *
1779+
static_cast<int64_t>(sizeof(uint32_t));
1780+
const int64_t estimated_buffer_size = decoder_bytes +
1781+
total_input_size * kFsstCompressionExpansion +
1782+
length_prefix_bytes;
17861783

17871784
PARQUET_ASSIGN_OR_THROW(auto output_buffer,
17881785
AllocateResizableBuffer(estimated_buffer_size, pool_));
@@ -1867,8 +1864,7 @@ class FsstEncoder : public EncoderImpl, virtual public TypedEncoder<ByteArrayTyp
18671864
int64_t valid_bits_offset) override {
18681865
if (valid_bits != NULLPTR) {
18691866
PARQUET_ASSIGN_OR_THROW(
1870-
auto buffer,
1871-
::arrow::AllocateBuffer(num_values * sizeof(ByteArray), pool_));
1867+
auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(ByteArray), pool_));
18721868
auto buffer_ptr = reinterpret_cast<ByteArray*>(buffer->mutable_data());
18731869
int num_valid_values = ::arrow::util::internal::SpacedCompress<ByteArray>(
18741870
src, num_values, valid_bits, valid_bits_offset, buffer_ptr);
@@ -1897,8 +1893,8 @@ class FsstEncoder : public EncoderImpl, virtual public TypedEncoder<ByteArrayTyp
18971893
input_ptrs.push_back(val.ptr);
18981894
}
18991895

1900-
encoder_ = fsst_create(unencoded_values_.size(), input_lengths.data(),
1901-
input_ptrs.data(), 0);
1896+
encoder_ =
1897+
fsst_create(unencoded_values_.size(), input_lengths.data(), input_ptrs.data(), 0);
19021898

19031899
if (!encoder_) {
19041900
throw ParquetException("Failed to create FSST encoder");

cpp/src/parquet/encoding_test.cc

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "arrow/util/endian.h"
4545
#include "arrow/util/span.h"
4646
#include "arrow/util/string.h"
47+
#include "fsst.h" // NOLINT(build/include_subdir)
4748
#include "parquet/column_page.h"
4849
#include "parquet/column_reader.h"
4950
#include "parquet/encoding.h"
@@ -53,7 +54,6 @@
5354
#include "parquet/platform.h"
5455
#include "parquet/schema.h"
5556
#include "parquet/test_util.h"
56-
#include "fsst.h" // NOLINT(build/include_subdir)
5757
#include "parquet/types.h"
5858

5959
using arrow::default_memory_pool;
@@ -2611,8 +2611,8 @@ TEST(TestFsstEncoding, BasicRoundTrip) {
26112611
constexpr int32_t kMinLength = 4;
26122612
constexpr int32_t kMaxLength = 48;
26132613

2614-
auto values = rag.BinaryWithRepeats(kNumValues, kNumUnique, kMinLength, kMaxLength,
2615-
0.0);
2614+
auto values =
2615+
rag.BinaryWithRepeats(kNumValues, kNumUnique, kMinLength, kMaxLength, 0.0);
26162616

26172617
auto encoder = MakeTypedEncoder<ByteArrayType>(Encoding::FSST);
26182618
ASSERT_NO_THROW(encoder->Put(*values));
@@ -2711,13 +2711,13 @@ TEST(TestFsstEncoding, MultiPageRoundTrip) {
27112711
->build();
27122712

27132713
std::unique_ptr<ParquetFileWriter> writer;
2714-
ASSERT_NO_THROW(writer =
2715-
ParquetFileWriter::Open(output_stream, parquet_schema, writer_props));
2714+
ASSERT_NO_THROW(
2715+
writer = ParquetFileWriter::Open(output_stream, parquet_schema, writer_props));
27162716
ASSERT_NE(nullptr, writer);
27172717
auto* row_group_writer = writer->AppendRowGroup();
27182718
ASSERT_NE(nullptr, row_group_writer);
2719-
auto* column_writer = static_cast<TypedColumnWriter<ByteArrayType>*>(
2720-
row_group_writer->NextColumn());
2719+
auto* column_writer =
2720+
static_cast<TypedColumnWriter<ByteArrayType>*>(row_group_writer->NextColumn());
27212721
ASSERT_NE(nullptr, column_writer);
27222722

27232723
auto write_page = [&](const std::vector<std::string>& source) {
@@ -2760,16 +2760,16 @@ TEST(TestFsstEncoding, MultiPageRoundTrip) {
27602760
ASSERT_NO_THROW(reader = make_reader());
27612761
ASSERT_NE(nullptr, reader);
27622762
auto row_group_reader = reader->RowGroup(0);
2763-
auto column_reader =
2764-
std::static_pointer_cast<TypedColumnReader<ByteArrayType>>(row_group_reader->Column(0));
2763+
auto column_reader = std::static_pointer_cast<TypedColumnReader<ByteArrayType>>(
2764+
row_group_reader->Column(0));
27652765

27662766
std::vector<ByteArray> decoded(kTotalValues);
27672767
int64_t values_read = 0;
27682768
while (values_read < kTotalValues) {
27692769
int64_t batch_length = std::min<int64_t>(1024, kTotalValues - values_read);
27702770
int64_t batch_read = 0;
2771-
column_reader->ReadBatch(batch_length, nullptr, nullptr,
2772-
decoded.data() + values_read, &batch_read);
2771+
column_reader->ReadBatch(batch_length, nullptr, nullptr, decoded.data() + values_read,
2772+
&batch_read);
27732773
ASSERT_GT(batch_read, 0);
27742774
values_read += batch_read;
27752775
}

0 commit comments

Comments
 (0)