|
51 | 51 | #include "arrow/type_fwd.h" |
52 | 52 | #include "arrow/type_traits.h" |
53 | 53 | #include "arrow/util/checked_cast.h" |
54 | | -#include "arrow/util/config.h" // for ARROW_CSV definition |
| 54 | +#include "arrow/util/config.h" // for ARROW_CSV and PARQUET_REQUIRE_ENCRYPTION |
55 | 55 | #include "arrow/util/decimal.h" |
56 | 56 | #include "arrow/util/future.h" |
57 | 57 | #include "arrow/util/key_value_metadata.h" |
|
65 | 65 | #include "parquet/api/reader.h" |
66 | 66 | #include "parquet/api/writer.h" |
67 | 67 |
|
| 68 | +#include "parquet/arrow/fuzz_internal.h" |
68 | 69 | #include "parquet/arrow/reader.h" |
69 | 70 | #include "parquet/arrow/reader_internal.h" |
70 | 71 | #include "parquet/arrow/schema.h" |
@@ -5830,29 +5831,53 @@ TEST(TestArrowReadWrite, MultithreadedWrite) { |
5830 | 5831 | } |
5831 | 5832 |
|
5832 | 5833 | TEST(TestArrowReadWrite, FuzzReader) { |
| 5834 | + using ::parquet::fuzzing::internal::FuzzReader; |
| 5835 | + |
5833 | 5836 | constexpr size_t kMaxFileSize = 1024 * 1024 * 1; |
| 5837 | + |
5834 | 5838 | auto check_bad_file = [&](const std::string& file_name) { |
5835 | 5839 | SCOPED_TRACE(file_name); |
5836 | 5840 | auto path = test::get_data_file(file_name, /*is_good=*/false); |
5837 | 5841 | PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open( |
5838 | 5842 | path, ::arrow::io::FileMode::READ)); |
5839 | 5843 | PARQUET_ASSIGN_OR_THROW(auto buffer, source->Read(kMaxFileSize)); |
5840 | | - auto s = internal::FuzzReader(buffer->data(), buffer->size()); |
| 5844 | + auto s = FuzzReader(buffer->data(), buffer->size()); |
5841 | 5845 | ASSERT_NOT_OK(s); |
5842 | 5846 | }; |
| 5847 | + |
| 5848 | + auto check_good_file = [&](const std::string& file_name, bool expect_error = false) { |
| 5849 | + SCOPED_TRACE(file_name); |
| 5850 | + auto path = test::get_data_file(file_name, /*is_good=*/true); |
| 5851 | + PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open( |
| 5852 | + path, ::arrow::io::FileMode::READ)); |
| 5853 | + PARQUET_ASSIGN_OR_THROW(auto buffer, source->Read(kMaxFileSize)); |
| 5854 | + auto s = FuzzReader(buffer->data(), buffer->size()); |
| 5855 | + if (expect_error) { |
| 5856 | + ASSERT_NOT_OK(s); |
| 5857 | + } else { |
| 5858 | + ASSERT_OK(s); |
| 5859 | + } |
| 5860 | + }; |
| 5861 | + |
5843 | 5862 | check_bad_file("PARQUET-1481.parquet"); |
5844 | 5863 | check_bad_file("ARROW-GH-41317.parquet"); |
5845 | 5864 | check_bad_file("ARROW-GH-41321.parquet"); |
5846 | 5865 | check_bad_file("ARROW-RS-GH-6229-LEVELS.parquet"); |
5847 | 5866 | check_bad_file("ARROW-RS-GH-6229-DICTHEADER.parquet"); |
5848 | | - { |
5849 | | - auto path = test::get_data_file("alltypes_plain.parquet", /*is_good=*/true); |
5850 | | - PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open( |
5851 | | - path, ::arrow::io::FileMode::READ)); |
5852 | | - PARQUET_ASSIGN_OR_THROW(auto buffer, source->Read(kMaxFileSize)); |
5853 | | - auto s = internal::FuzzReader(buffer->data(), buffer->size()); |
5854 | | - ASSERT_OK(s); |
5855 | | - } |
| 5867 | + |
| 5868 | + check_good_file("alltypes_plain.parquet"); |
| 5869 | + check_good_file("data_index_bloom_encoding_stats.parquet"); |
| 5870 | + check_good_file("data_index_bloom_encoding_with_length.parquet"); |
| 5871 | +#ifdef PARQUET_REQUIRE_ENCRYPTION |
| 5872 | + // Encrypted files in the testing repo should be ok, except those |
| 5873 | + // that require external key material or an explicitly-supplied AAD. |
| 5874 | + check_good_file("uniform_encryption.parquet.encrypted"); |
| 5875 | + check_good_file("encrypt_columns_and_footer_aad.parquet.encrypted"); |
| 5876 | + check_good_file("encrypt_columns_and_footer.parquet.encrypted"); |
| 5877 | + check_good_file("encrypt_columns_plaintext_footer.parquet.encrypted"); |
| 5878 | +#else |
| 5879 | + check_good_file("uniform_encryption.parquet.encrypted", /*expect_error=*/true); |
| 5880 | +#endif |
5856 | 5881 | } |
5857 | 5882 |
|
5858 | 5883 | // Test writing table with a closed writer, should not segfault (GH-37969). |
|
0 commit comments