Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/src/arrow/util/fuzz_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ void LogFuzzStatus(const Status& st, const uint8_t* data, int64_t size) {
return value;
}();

if (kVerbosity >= 1) {
if (!st.ok() && kVerbosity >= 1) {
ARROW_LOG(WARNING) << "Fuzzing input with size=" << size
<< " failed: " << st.ToString();
} else if (st.IsOutOfMemory()) {
Expand Down
1 change: 1 addition & 0 deletions cpp/src/parquet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ endif()
# Library config

set(PARQUET_SRCS
arrow/fuzz_internal.cc
arrow/path_internal.cc
arrow/reader.cc
arrow/reader_internal.cc
Expand Down
45 changes: 35 additions & 10 deletions cpp/src/parquet/arrow/arrow_reader_writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
#include "arrow/type_fwd.h"
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/config.h" // for ARROW_CSV definition
#include "arrow/util/config.h" // for ARROW_CSV and PARQUET_REQUIRE_ENCRYPTION
#include "arrow/util/decimal.h"
#include "arrow/util/future.h"
#include "arrow/util/key_value_metadata.h"
Expand All @@ -65,6 +65,7 @@
#include "parquet/api/reader.h"
#include "parquet/api/writer.h"

#include "parquet/arrow/fuzz_internal.h"
#include "parquet/arrow/reader.h"
#include "parquet/arrow/reader_internal.h"
#include "parquet/arrow/schema.h"
Expand Down Expand Up @@ -5830,29 +5831,53 @@ TEST(TestArrowReadWrite, MultithreadedWrite) {
}

TEST(TestArrowReadWrite, FuzzReader) {
using ::parquet::fuzzing::internal::FuzzReader;

constexpr size_t kMaxFileSize = 1024 * 1024 * 1;

auto check_bad_file = [&](const std::string& file_name) {
SCOPED_TRACE(file_name);
auto path = test::get_data_file(file_name, /*is_good=*/false);
PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open(
path, ::arrow::io::FileMode::READ));
PARQUET_ASSIGN_OR_THROW(auto buffer, source->Read(kMaxFileSize));
auto s = internal::FuzzReader(buffer->data(), buffer->size());
auto s = FuzzReader(buffer->data(), buffer->size());
ASSERT_NOT_OK(s);
};

auto check_good_file = [&](const std::string& file_name, bool expect_error = false) {
SCOPED_TRACE(file_name);
auto path = test::get_data_file(file_name, /*is_good=*/true);
PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open(
path, ::arrow::io::FileMode::READ));
PARQUET_ASSIGN_OR_THROW(auto buffer, source->Read(kMaxFileSize));
auto s = FuzzReader(buffer->data(), buffer->size());
if (expect_error) {
ASSERT_NOT_OK(s);
} else {
ASSERT_OK(s);
}
};

check_bad_file("PARQUET-1481.parquet");
check_bad_file("ARROW-GH-41317.parquet");
check_bad_file("ARROW-GH-41321.parquet");
check_bad_file("ARROW-RS-GH-6229-LEVELS.parquet");
check_bad_file("ARROW-RS-GH-6229-DICTHEADER.parquet");
{
auto path = test::get_data_file("alltypes_plain.parquet", /*is_good=*/true);
PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open(
path, ::arrow::io::FileMode::READ));
PARQUET_ASSIGN_OR_THROW(auto buffer, source->Read(kMaxFileSize));
auto s = internal::FuzzReader(buffer->data(), buffer->size());
ASSERT_OK(s);
}

check_good_file("alltypes_plain.parquet");
check_good_file("data_index_bloom_encoding_stats.parquet");
check_good_file("data_index_bloom_encoding_with_length.parquet");
#ifdef PARQUET_REQUIRE_ENCRYPTION
// Encrypted files in the testing repo should be ok, except those
// that require external key material or an explicitly-supplied AAD.
check_good_file("uniform_encryption.parquet.encrypted");
check_good_file("encrypt_columns_and_footer_aad.parquet.encrypted");
check_good_file("encrypt_columns_and_footer.parquet.encrypted");
check_good_file("encrypt_columns_plaintext_footer.parquet.encrypted");
#else
check_good_file("uniform_encryption.parquet.encrypted", /*expect_error=*/true);
#endif
}

// Test writing table with a closed writer, should not segfault (GH-37969).
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/parquet/arrow/fuzz.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

#include "arrow/status.h"
#include "arrow/util/fuzz_internal.h"
#include "parquet/arrow/reader.h"
#include "parquet/arrow/fuzz_internal.h"

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
auto status = parquet::arrow::internal::FuzzReader(data, static_cast<int64_t>(size));
auto status = parquet::fuzzing::internal::FuzzReader(data, static_cast<int64_t>(size));
arrow::internal::LogFuzzStatus(status, data, static_cast<int64_t>(size));
return 0;
}
Loading
Loading