Skip to content

Commit a14a96d

Browse files
committed
GH-48335: [C++][Parquet] Fuzz encrypted files
1 parent 85ea78d commit a14a96d

File tree

11 files changed

+619
-309
lines changed

11 files changed

+619
-309
lines changed

cpp/src/arrow/util/fuzz_internal.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ void LogFuzzStatus(const Status& st, const uint8_t* data, int64_t size) {
4949
return value;
5050
}();
5151

52-
if (kVerbosity >= 1) {
52+
if (!st.ok() && kVerbosity >= 1) {
5353
ARROW_LOG(WARNING) << "Fuzzing input with size=" << size
5454
<< " failed: " << st.ToString();
5555
} else if (st.IsOutOfMemory()) {

cpp/src/parquet/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ endif()
151151
# Library config
152152

153153
set(PARQUET_SRCS
154+
arrow/fuzz_internal.cc
154155
arrow/path_internal.cc
155156
arrow/reader.cc
156157
arrow/reader_internal.cc

cpp/src/parquet/arrow/arrow_reader_writer_test.cc

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
#include "arrow/type_fwd.h"
5252
#include "arrow/type_traits.h"
5353
#include "arrow/util/checked_cast.h"
54-
#include "arrow/util/config.h" // for ARROW_CSV definition
54+
#include "arrow/util/config.h" // for ARROW_CSV and PARQUET_REQUIRE_ENCRYPTION
5555
#include "arrow/util/decimal.h"
5656
#include "arrow/util/future.h"
5757
#include "arrow/util/key_value_metadata.h"
@@ -65,6 +65,7 @@
6565
#include "parquet/api/reader.h"
6666
#include "parquet/api/writer.h"
6767

68+
#include "parquet/arrow/fuzz_internal.h"
6869
#include "parquet/arrow/reader.h"
6970
#include "parquet/arrow/reader_internal.h"
7071
#include "parquet/arrow/schema.h"
@@ -5830,29 +5831,53 @@ TEST(TestArrowReadWrite, MultithreadedWrite) {
58305831
}
58315832

58325833
TEST(TestArrowReadWrite, FuzzReader) {
5834+
using ::parquet::fuzzing::internal::FuzzReader;
5835+
58335836
constexpr size_t kMaxFileSize = 1024 * 1024 * 1;
5837+
58345838
auto check_bad_file = [&](const std::string& file_name) {
58355839
SCOPED_TRACE(file_name);
58365840
auto path = test::get_data_file(file_name, /*is_good=*/false);
58375841
PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open(
58385842
path, ::arrow::io::FileMode::READ));
58395843
PARQUET_ASSIGN_OR_THROW(auto buffer, source->Read(kMaxFileSize));
5840-
auto s = internal::FuzzReader(buffer->data(), buffer->size());
5844+
auto s = FuzzReader(buffer->data(), buffer->size());
58415845
ASSERT_NOT_OK(s);
58425846
};
5847+
5848+
auto check_good_file = [&](const std::string& file_name, bool expect_error = false) {
5849+
SCOPED_TRACE(file_name);
5850+
auto path = test::get_data_file(file_name, /*is_good=*/true);
5851+
PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open(
5852+
path, ::arrow::io::FileMode::READ));
5853+
PARQUET_ASSIGN_OR_THROW(auto buffer, source->Read(kMaxFileSize));
5854+
auto s = FuzzReader(buffer->data(), buffer->size());
5855+
if (expect_error) {
5856+
ASSERT_NOT_OK(s);
5857+
} else {
5858+
ASSERT_OK(s);
5859+
}
5860+
};
5861+
58435862
check_bad_file("PARQUET-1481.parquet");
58445863
check_bad_file("ARROW-GH-41317.parquet");
58455864
check_bad_file("ARROW-GH-41321.parquet");
58465865
check_bad_file("ARROW-RS-GH-6229-LEVELS.parquet");
58475866
check_bad_file("ARROW-RS-GH-6229-DICTHEADER.parquet");
5848-
{
5849-
auto path = test::get_data_file("alltypes_plain.parquet", /*is_good=*/true);
5850-
PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open(
5851-
path, ::arrow::io::FileMode::READ));
5852-
PARQUET_ASSIGN_OR_THROW(auto buffer, source->Read(kMaxFileSize));
5853-
auto s = internal::FuzzReader(buffer->data(), buffer->size());
5854-
ASSERT_OK(s);
5855-
}
5867+
5868+
check_good_file("alltypes_plain.parquet");
5869+
check_good_file("data_index_bloom_encoding_stats.parquet");
5870+
check_good_file("data_index_bloom_encoding_with_length.parquet");
5871+
#ifdef PARQUET_REQUIRE_ENCRYPTION
5872+
// Encrypted files in the testing repo should be ok, except those
5873+
// that require external key material or an explicitly-supplied AAD.
5874+
check_good_file("uniform_encryption.parquet.encrypted");
5875+
check_good_file("encrypt_columns_and_footer_aad.parquet.encrypted");
5876+
check_good_file("encrypt_columns_and_footer.parquet.encrypted");
5877+
check_good_file("encrypt_columns_plaintext_footer.parquet.encrypted");
5878+
#else
5879+
check_good_file("uniform_encryption.parquet.encrypted", /*expect_error=*/true);
5880+
#endif
58565881
}
58575882

58585883
// Test writing table with a closed writer, should not segfault (GH-37969).

cpp/src/parquet/arrow/fuzz.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717

1818
#include "arrow/status.h"
1919
#include "arrow/util/fuzz_internal.h"
20-
#include "parquet/arrow/reader.h"
20+
#include "parquet/arrow/fuzz_internal.h"
2121

2222
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
23-
auto status = parquet::arrow::internal::FuzzReader(data, static_cast<int64_t>(size));
23+
auto status = parquet::fuzzing::internal::FuzzReader(data, static_cast<int64_t>(size));
2424
arrow::internal::LogFuzzStatus(status, data, static_cast<int64_t>(size));
2525
return 0;
2626
}

0 commit comments

Comments
 (0)