|
16 | 16 | // under the License. |
17 | 17 | use crate::decoder::OffsetSizeBytes; |
18 | 18 | use crate::utils::{ |
19 | | - first_byte_from_slice, slice_from_slice, try_binary_search_range_by, validate_fallible_iterator, |
| 19 | + first_byte_from_slice, overflow_error, slice_from_slice, try_binary_search_range_by, |
| 20 | + validate_fallible_iterator, |
20 | 21 | }; |
21 | 22 | use crate::variant::{Variant, VariantMetadata}; |
22 | 23 |
|
23 | 24 | use arrow_schema::ArrowError; |
24 | 25 |
|
| 26 | +// The value header occupies one byte; use a named constant for readability |
| 27 | +const NUM_HEADER_BYTES: usize = 1; |
| 28 | + |
25 | 29 | /// Header structure for [`VariantObject`] |
26 | 30 | #[derive(Clone, Debug, PartialEq)] |
27 | 31 | pub(crate) struct VariantObjectHeader { |
@@ -72,36 +76,43 @@ impl<'m, 'v> VariantObject<'m, 'v> { |
72 | 76 | let header_byte = first_byte_from_slice(value)?; |
73 | 77 | let header = VariantObjectHeader::try_new(header_byte)?; |
74 | 78 |
|
75 | | - // Determine num_elements size based on is_large flag |
| 79 | + // Determine num_elements size based on is_large flag and fetch the value |
76 | 80 | let num_elements_size = if header.is_large { |
77 | 81 | OffsetSizeBytes::Four |
78 | 82 | } else { |
79 | 83 | OffsetSizeBytes::One |
80 | 84 | }; |
| 85 | + let num_elements = num_elements_size.unpack_usize(value, NUM_HEADER_BYTES, 0)?; |
| 86 | + |
| 87 | + // Calculate byte offsets for different sections with overflow protection |
| 88 | + let field_ids_start_byte = NUM_HEADER_BYTES |
| 89 | + .checked_add(num_elements_size as usize) |
| 90 | + .ok_or_else(|| overflow_error("offset of variant object field ids"))?; |
81 | 91 |
|
82 | | - // Parse num_elements |
83 | | - let num_elements = num_elements_size.unpack_usize(value, 1, 0)?; |
| 92 | + let field_offsets_start_byte = num_elements |
| 93 | + .checked_mul(header.field_id_size as usize) |
| 94 | + .and_then(|n| n.checked_add(field_ids_start_byte)) |
| 95 | + .ok_or_else(|| overflow_error("offset of variant object field offsets"))?; |
84 | 96 |
|
85 | | - // Calculate byte offsets for different sections |
86 | | - let field_ids_start_byte = 1 + num_elements_size as usize; |
87 | | - let field_offsets_start_byte = |
88 | | - field_ids_start_byte + num_elements * header.field_id_size as usize; |
89 | | - let values_start_byte = |
90 | | - field_offsets_start_byte + (num_elements + 1) * header.field_offset_size as usize; |
| 97 | + let values_start_byte = num_elements |
| 98 | + .checked_add(1) |
| 99 | + .and_then(|n| n.checked_mul(header.field_offset_size as usize)) |
| 100 | + .and_then(|n| n.checked_add(field_offsets_start_byte)) |
| 101 | + .ok_or_else(|| overflow_error("offset of variant object field values"))?; |
91 | 102 |
|
92 | 103 | // Spec says: "The last field_offset points to the byte after the end of the last value" |
93 | 104 | // |
94 | 105 | // Use the last offset as a bounds check. The iterator check below doesn't use it -- offsets |
95 | 106 | // are not monotonic -- so we have to check separately here. |
96 | | - let last_field_offset = |
97 | | - header |
98 | | - .field_offset_size |
99 | | - .unpack_usize(value, field_offsets_start_byte, num_elements)?; |
100 | | - if values_start_byte + last_field_offset > value.len() { |
| 107 | + let end_offset = header |
| 108 | + .field_offset_size |
| 109 | + .unpack_usize(value, field_offsets_start_byte, num_elements)? |
| 110 | + .checked_add(values_start_byte) |
| 111 | + .ok_or_else(|| overflow_error("end of variant object field values"))?; |
| 112 | + if end_offset > value.len() { |
101 | 113 | return Err(ArrowError::InvalidArgumentError(format!( |
102 | | - "Last field offset value {} at offset {} is outside the value slice of length {}", |
103 | | - last_field_offset, |
104 | | - values_start_byte, |
| 114 | + "Last field offset value {} is outside the value slice of length {}", |
| 115 | + end_offset, |
105 | 116 | value.len() |
106 | 117 | ))); |
107 | 118 | } |
@@ -140,7 +151,11 @@ impl<'m, 'v> VariantObject<'m, 'v> { |
140 | 151 | self.field_offsets_start_byte, |
141 | 152 | i, |
142 | 153 | )?; |
143 | | - let value_bytes = slice_from_slice(self.value, self.values_start_byte + start_offset..)?; |
| 154 | + let value_start = self |
| 155 | + .values_start_byte |
| 156 | + .checked_add(start_offset) |
| 157 | + .ok_or_else(|| overflow_error("offset of variant object field"))?; |
| 158 | + let value_bytes = slice_from_slice(self.value, value_start..)?; |
144 | 159 | Variant::try_new_with_metadata(self.metadata, value_bytes) |
145 | 160 | } |
146 | 161 |
|
|
0 commit comments