@@ -754,6 +754,12 @@ class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType> {
754754 int64_t valid_bits_offset,
755755 typename EncodingTraits<ByteArrayType>::Accumulator* out,
756756 int * out_values_decoded) {
757+ // We're going to decode up to `num_values - null_count` PLAIN values,
758+ // and each value has a 4-byte length header that doesn't count for the
759+ // Arrow binary data length.
760+ int64_t estimated_data_length =
761+ std::max<int64_t >(0 , len_ - 4 * (num_values - null_count));
762+
757763 auto visit_binary_helper = [&](auto * helper) {
758764 int values_decoded = 0 ;
759765
@@ -772,11 +778,12 @@ class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType> {
772778 " Invalid or truncated PLAIN-encoded BYTE_ARRAY data" );
773779 }
774780 RETURN_NOT_OK (
775- helper->AppendValue (data_ + 4 , value_len,
776- /* estimated_remaining_data_length=*/ len_));
781+ helper->AppendValue (data_ + 4 , value_len, estimated_data_length));
777782 auto increment = value_len + 4 ;
778783 data_ += increment;
779784 len_ -= increment;
785+ estimated_data_length -= value_len;
786+ DCHECK_GE (estimated_data_length, 0 );
780787 }
781788 values_decoded += static_cast <int >(run_length);
782789 return Status::OK ();
@@ -790,8 +797,8 @@ class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType> {
790797 return Status::OK ();
791798 };
792799
793- return DispatchArrowBinaryHelper<ByteArrayType>(out, num_values, len_,
794- visit_binary_helper);
800+ return DispatchArrowBinaryHelper<ByteArrayType>(
801+ out, num_values, estimated_data_length, visit_binary_helper);
795802 }
796803
797804 template <typename BuilderType>
0 commit comments