@@ -37,32 +37,28 @@ namespace {
3737
3838// / \brief Header structure for ALP compression blocks
3939// /
40- // / Contains metadata required to decompress the data.
40+ // / Contains metadata required to decompress the data. Note that compressed_size
41+ // / and num_elements are NOT stored in the header - they are available from the
42+ // / page header and passed to the Decode() function.
4143// /
4244// / Serialization format (version 1):
4345// /
4446// / +---------------------------------------------------+
45- // / | CompressionBlockHeader (40 bytes) |
47+ // / | CompressionBlockHeader (24 bytes) |
4648// / +---------------------------------------------------+
4749// / | Offset | Field | Size |
4850// / +---------+---------------------+-------------------+
4951// / | 0 | version | 8 bytes (uint64) |
50- // / | 8 | compressed_size | 8 bytes (uint64) |
51- // / | 16 | num_elements | 8 bytes (uint64) |
52- // / | 24 | vector_size | 8 bytes (uint64) |
53- // / | 32 | compression_mode | 4 bytes (enum) |
54- // / | 36 | bit_pack_layout | 4 bytes (enum) |
52+ // / | 8 | vector_size | 8 bytes (uint64) |
53+ // / | 16 | compression_mode | 4 bytes (enum) |
54+ // / | 20 | bit_pack_layout | 4 bytes (enum) |
5555// / +---------------------------------------------------+
5656// /
5757// / \note version must remain the first field to allow reading the rest
5858// / of the header based on version number.
5959struct CompressionBlockHeader {
6060 // / Version number. Must remain the first field for version-based parsing.
6161 uint64_t version = 0 ;
62- // / Size of the compressed data in bytes (includes header).
63- uint64_t compressed_size = 0 ;
64- // / Number of elements in the compressed data.
65- uint64_t num_elements = 0 ;
6662 // / Vector size used for compression.
6763 // / Must be AlpConstants::kAlpVectorSize for decompression.
6864 uint64_t vector_size = 0 ;
@@ -78,8 +74,8 @@ struct CompressionBlockHeader {
7874 static size_t GetSizeForVersion (uint64_t v) {
7975 size_t size;
8076 if (v == 1 ) {
81- size = sizeof (version) + sizeof (compressed_size ) + sizeof (num_elements ) +
82- sizeof (vector_size) + sizeof (compression_mode) + sizeof ( bit_pack_layout);
77+ size = sizeof (version) + sizeof (vector_size ) + sizeof (compression_mode ) +
78+ sizeof (bit_pack_layout);
8379 } else {
8480 ARROW_CHECK (false ) << " unknown_version: " << v;
8581 }
@@ -151,33 +147,24 @@ void AlpWrapper<T>::Encode(const T* decomp, size_t decomp_size, char* comp,
151147
152148 CompressionBlockHeader header{};
153149 header.version = version;
154- header.compressed_size =
155- ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion (version) +
156- compression_progress.num_compressed_bytes_produced;
157- header.num_elements = decomp_size / sizeof (T);
158150 header.vector_size = AlpConstants::kAlpVectorSize ;
159151 header.compression_mode = AlpMode::kAlp ;
160152 header.bit_pack_layout = AlpBitPackLayout::kNormal ;
161153
162154 std::memcpy (encoded_header, &header,
163155 ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion (version));
164- *comp_size = header.compressed_size ;
156+ *comp_size = ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion (version) +
157+ compression_progress.num_compressed_bytes_produced ;
165158}
166159
167160template <typename T>
168161template <typename TargetType>
169- void AlpWrapper<T>::Decode(TargetType* decomp, size_t * decomp_size , const char * comp,
162+ void AlpWrapper<T>::Decode(TargetType* decomp, uint64_t num_elements , const char * comp,
170163 size_t comp_size) {
171164 const CompressionBlockHeader header = LoadHeader (comp, comp_size);
172165 ARROW_CHECK (header.vector_size == AlpConstants::kAlpVectorSize )
173166 << " unsupported_vector_size: " << header.vector_size ;
174167
175- if (header.num_elements * sizeof (TargetType) > *decomp_size) {
176- *decomp_size = 0 ;
177- return ;
178- }
179-
180- const uint64_t elements_to_decode = header.num_elements ;
181168 const char * compression_body =
182169 comp + ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion (header.version );
183170 const uint64_t compression_body_size =
@@ -186,34 +173,17 @@ void AlpWrapper<T>::Decode(TargetType* decomp, size_t* decomp_size, const char*
186173
187174 ARROW_CHECK (header.compression_mode == AlpMode::kAlp ) << " alp_decode_unsupported_mode" ;
188175
189- uint64_t elements_decoded =
190- DecodeAlp (decomp, elements_to_decode, compression_body, compression_body_size,
191- header.bit_pack_layout )
192- .num_decompressed_elements_produced ;
193- *decomp_size = elements_decoded * sizeof (TargetType);
176+ DecodeAlp<TargetType>(decomp, num_elements, compression_body, compression_body_size,
177+ header.bit_pack_layout );
194178}
195179
196- template void AlpWrapper<float >::Decode(float * decomp, size_t * decomp_size ,
180+ template void AlpWrapper<float >::Decode(float * decomp, uint64_t num_elements ,
197181 const char * comp, size_t comp_size);
198- template void AlpWrapper<float >::Decode(double * decomp, size_t * decomp_size ,
182+ template void AlpWrapper<float >::Decode(double * decomp, uint64_t num_elements ,
199183 const char * comp, size_t comp_size);
200- template void AlpWrapper<double >::Decode(double * decomp, size_t * decomp_size ,
184+ template void AlpWrapper<double >::Decode(double * decomp, uint64_t num_elements ,
201185 const char * comp, size_t comp_size);
202186
203- template <typename T>
204- template <typename TargetType>
205- uint64_t AlpWrapper<T>::GetDecompressedSize(const char * comp, uint64_t comp_size) {
206- const CompressionBlockHeader header = LoadHeader (comp, comp_size);
207- return header.num_elements * sizeof (TargetType);
208- }
209-
210- template uint64_t AlpWrapper<float >::GetDecompressedSize<float >(const char * comp,
211- uint64_t comp_size);
212- template uint64_t AlpWrapper<float >::GetDecompressedSize<double >(const char * comp,
213- uint64_t comp_size);
214- template uint64_t AlpWrapper<double >::GetDecompressedSize<double >(const char * comp,
215- uint64_t comp_size);
216-
217187template <typename T>
218188uint64_t AlpWrapper<T>::GetMaxCompressedSize(uint64_t decomp_size) {
219189 ARROW_CHECK (decomp_size % sizeof (T) == 0 )
0 commit comments