|
4 | 4 | #include <parquet/encoding.h> |
5 | 5 | #include <parquet/schema.h> |
6 | 6 | #include <arrow/util/rle_encoding.h> |
| 7 | +#include <arrow/util/crc32.h> |
7 | 8 | #include <lz4.h> |
8 | 9 | #include <Poco/JSON/JSON.h> |
9 | 10 | #include <Poco/JSON/Object.h> |
@@ -815,8 +816,12 @@ void writeColumnImpl( |
815 | 816 | d.__set_encoding(use_dictionary ? parq::Encoding::RLE_DICTIONARY : encoding); |
816 | 817 | d.__set_definition_level_encoding(parq::Encoding::RLE); |
817 | 818 | d.__set_repetition_level_encoding(parq::Encoding::RLE); |
818 | | - /// We could also put checksum in `header.crc`, but apparently no one uses it: |
819 | | - /// https://issues.apache.org/jira/browse/PARQUET-594 |
| 819 | + |
| 820 | + if (options.write_checksums) |
| 821 | + { |
| 822 | + uint32_t crc = arrow::internal::crc32(0, compressed.data(), compressed.size()); |
| 823 | + header.__set_crc(crc); |
| 824 | + } |
820 | 825 |
|
821 | 826 | parq::Statistics page_stats = page_statistics.get(options); |
822 | 827 | bool has_null_count = s.max_def == 1 && s.max_rep == 0; |
@@ -878,6 +883,12 @@ void writeColumnImpl( |
878 | 883 | header.dictionary_page_header.__set_num_values(dict_encoder->num_entries()); |
879 | 884 | header.dictionary_page_header.__set_encoding(parq::Encoding::PLAIN); |
880 | 885 |
|
| 886 | + if (options.write_checksums) |
| 887 | + { |
| 888 | + uint32_t crc = arrow::internal::crc32(0, compressed.data(), compressed.size()); |
| 889 | + header.__set_crc(crc); |
| 890 | + } |
| 891 | + |
881 | 892 | writePage(header, compressed, s, /*add_to_offset_index*/ false, /*first_row_index*/ 0, out); |
882 | 893 |
|
883 | 894 | for (auto & p : dict_encoded_pages) |
|
0 commit comments