1515// specific language governing permissions and limitations
1616// under the License.
1717use arrow_schema:: ArrowError ;
18+ use chrono:: { DateTime , Duration , NaiveDate , NaiveDateTime , Utc } ;
1819use std:: array:: TryFromSliceError ;
1920
20- use crate :: utils:: { array_from_slice, first_byte_from_slice , string_from_slice} ;
21+ use crate :: utils:: { array_from_slice, slice_from_slice , string_from_slice} ;
2122
2223#[ derive( Debug , Clone , Copy ) ]
2324pub enum VariantBasicType {
@@ -33,7 +34,18 @@ pub enum VariantPrimitiveType {
3334 BooleanTrue = 1 ,
3435 BooleanFalse = 2 ,
3536 Int8 = 3 ,
36- // TODO: Add types for the rest of primitives, once API is agreed upon
37+ Int16 = 4 ,
38+ Int32 = 5 ,
39+ Int64 = 6 ,
40+ Double = 7 ,
41+ Decimal4 = 8 ,
42+ Decimal8 = 9 ,
43+ Decimal16 = 10 ,
44+ Date = 11 ,
45+ TimestampMicros = 12 ,
46+ TimestampNtzMicros = 13 ,
47+ Float = 14 ,
48+ Binary = 15 ,
3749 String = 16 ,
3850}
3951
@@ -64,7 +76,18 @@ impl TryFrom<u8> for VariantPrimitiveType {
6476 1 => Ok ( VariantPrimitiveType :: BooleanTrue ) ,
6577 2 => Ok ( VariantPrimitiveType :: BooleanFalse ) ,
6678 3 => Ok ( VariantPrimitiveType :: Int8 ) ,
67- // TODO: Add types for the rest, once API is agreed upon
79+ 4 => Ok ( VariantPrimitiveType :: Int16 ) ,
80+ 5 => Ok ( VariantPrimitiveType :: Int32 ) ,
81+ 6 => Ok ( VariantPrimitiveType :: Int64 ) ,
82+ 7 => Ok ( VariantPrimitiveType :: Double ) ,
83+ 8 => Ok ( VariantPrimitiveType :: Decimal4 ) ,
84+ 9 => Ok ( VariantPrimitiveType :: Decimal8 ) ,
85+ 10 => Ok ( VariantPrimitiveType :: Decimal16 ) ,
86+ 11 => Ok ( VariantPrimitiveType :: Date ) ,
87+ 12 => Ok ( VariantPrimitiveType :: TimestampMicros ) ,
88+ 13 => Ok ( VariantPrimitiveType :: TimestampNtzMicros ) ,
89+ 14 => Ok ( VariantPrimitiveType :: Float ) ,
90+ 15 => Ok ( VariantPrimitiveType :: Binary ) ,
6891 16 => Ok ( VariantPrimitiveType :: String ) ,
6992 _ => Err ( ArrowError :: InvalidArgumentError ( format ! (
7093 "unknown primitive type: {}" ,
@@ -73,10 +96,10 @@ impl TryFrom<u8> for VariantPrimitiveType {
7396 }
7497 }
7598}
76- /// Extract the primitive type from a Variant value-header byte
77- pub ( crate ) fn get_primitive_type ( header : u8 ) -> Result < VariantPrimitiveType , ArrowError > {
99+ /// Extract the primitive type from a Variant value-metadata byte
100+ pub ( crate ) fn get_primitive_type ( metadata : u8 ) -> Result < VariantPrimitiveType , ArrowError > {
78101 // last 6 bits contain the primitive-type, see spec
79- VariantPrimitiveType :: try_from ( header >> 2 )
102+ VariantPrimitiveType :: try_from ( metadata >> 2 )
80103}
81104
82105/// To be used in `map_err` when unpacking an integer from a slice of bytes.
@@ -85,23 +108,103 @@ fn map_try_from_slice_error(e: TryFromSliceError) -> ArrowError {
85108}
86109
87110/// Decodes an Int8 from the value section of a variant.
88- pub ( crate ) fn decode_int8 ( value : & [ u8 ] ) -> Result < i8 , ArrowError > {
89- let value = i8:: from_le_bytes ( array_from_slice ( value, 1 ) ?) ;
111+ pub ( crate ) fn decode_int8 ( data : & [ u8 ] ) -> Result < i8 , ArrowError > {
112+ Ok ( i8:: from_le_bytes ( array_from_slice ( data, 0 ) ?) )
113+ }
114+
115+ /// Decodes an Int16 from the value section of a variant.
116+ pub ( crate ) fn decode_int16 ( data : & [ u8 ] ) -> Result < i16 , ArrowError > {
117+ Ok ( i16:: from_le_bytes ( array_from_slice ( data, 0 ) ?) )
118+ }
119+
120+ /// Decodes an Int32 from the value section of a variant.
121+ pub ( crate ) fn decode_int32 ( data : & [ u8 ] ) -> Result < i32 , ArrowError > {
122+ Ok ( i32:: from_le_bytes ( array_from_slice ( data, 0 ) ?) )
123+ }
124+
125+ /// Decodes an Int64 from the value section of a variant.
126+ pub ( crate ) fn decode_int64 ( data : & [ u8 ] ) -> Result < i64 , ArrowError > {
127+ Ok ( i64:: from_le_bytes ( array_from_slice ( data, 0 ) ?) )
128+ }
129+
130+ /// Decodes a Decimal4 from the value section of a variant.
131+ pub ( crate ) fn decode_decimal4 ( data : & [ u8 ] ) -> Result < ( i32 , u8 ) , ArrowError > {
132+ let scale = u8:: from_le_bytes ( array_from_slice ( data, 0 ) ?) ;
133+ let integer = i32:: from_le_bytes ( array_from_slice ( data, 1 ) ?) ;
134+ Ok ( ( integer, scale) )
135+ }
136+
137+ /// Decodes a Decimal8 from the value section of a variant.
138+ pub ( crate ) fn decode_decimal8 ( data : & [ u8 ] ) -> Result < ( i64 , u8 ) , ArrowError > {
139+ let scale = u8:: from_le_bytes ( array_from_slice ( data, 0 ) ?) ;
140+ let integer = i64:: from_le_bytes ( array_from_slice ( data, 1 ) ?) ;
141+ Ok ( ( integer, scale) )
142+ }
143+
144+ /// Decodes a Decimal16 from the value section of a variant.
145+ pub ( crate ) fn decode_decimal16 ( data : & [ u8 ] ) -> Result < ( i128 , u8 ) , ArrowError > {
146+ let scale = u8:: from_le_bytes ( array_from_slice ( data, 0 ) ?) ;
147+ let integer = i128:: from_le_bytes ( array_from_slice ( data, 1 ) ?) ;
148+ Ok ( ( integer, scale) )
149+ }
150+
151+ /// Decodes a Float from the value section of a variant.
152+ pub ( crate ) fn decode_float ( data : & [ u8 ] ) -> Result < f32 , ArrowError > {
153+ Ok ( f32:: from_le_bytes ( array_from_slice ( data, 0 ) ?) )
154+ }
155+
156+ /// Decodes a Double from the value section of a variant.
157+ pub ( crate ) fn decode_double ( data : & [ u8 ] ) -> Result < f64 , ArrowError > {
158+ Ok ( f64:: from_le_bytes ( array_from_slice ( data, 0 ) ?) )
159+ }
160+
161+ /// Decodes a Date from the value section of a variant.
162+ pub ( crate ) fn decode_date ( data : & [ u8 ] ) -> Result < NaiveDate , ArrowError > {
163+ let days_since_epoch = i32:: from_le_bytes ( array_from_slice ( data, 0 ) ?) ;
164+ let value = DateTime :: UNIX_EPOCH + Duration :: days ( i64:: from ( days_since_epoch) ) ;
165+ Ok ( value. date_naive ( ) )
166+ }
167+
168+ /// Decodes a TimestampMicros from the value section of a variant.
169+ pub ( crate ) fn decode_timestamp_micros ( data : & [ u8 ] ) -> Result < DateTime < Utc > , ArrowError > {
170+ let micros_since_epoch = i64:: from_le_bytes ( array_from_slice ( data, 0 ) ?) ;
171+ DateTime :: from_timestamp_micros ( micros_since_epoch) . ok_or_else ( || {
172+ ArrowError :: CastError ( format ! (
173+ "Could not cast `{micros_since_epoch}` microseconds into a DateTime<Utc>"
174+ ) )
175+ } )
176+ }
177+
178+ /// Decodes a TimestampNtzMicros from the value section of a variant.
179+ pub ( crate ) fn decode_timestampntz_micros ( data : & [ u8 ] ) -> Result < NaiveDateTime , ArrowError > {
180+ let micros_since_epoch = i64:: from_le_bytes ( array_from_slice ( data, 0 ) ?) ;
181+ DateTime :: from_timestamp_micros ( micros_since_epoch)
182+ . ok_or_else ( || {
183+ ArrowError :: CastError ( format ! (
184+ "Could not cast `{micros_since_epoch}` microseconds into a NaiveDateTime"
185+ ) )
186+ } )
187+ . map ( |v| v. naive_utc ( ) )
188+ }
189+
190+ /// Decodes a Binary from the value section of a variant.
191+ pub ( crate ) fn decode_binary ( data : & [ u8 ] ) -> Result < & [ u8 ] , ArrowError > {
192+ let len = u32:: from_le_bytes ( array_from_slice ( data, 0 ) ?) as usize ;
193+ let value = slice_from_slice ( data, 4 ..4 + len) ?;
90194 Ok ( value)
91195}
92196
93197/// Decodes a long string from the value section of a variant.
94- pub ( crate ) fn decode_long_string ( value : & [ u8 ] ) -> Result < & str , ArrowError > {
95- let len = u32:: from_le_bytes ( array_from_slice ( value , 1 ) ?) as usize ;
96- let string = string_from_slice ( value , 5 .. 5 + len) ?;
198+ pub ( crate ) fn decode_long_string ( data : & [ u8 ] ) -> Result < & str , ArrowError > {
199+ let len = u32:: from_le_bytes ( array_from_slice ( data , 0 ) ?) as usize ;
200+ let string = string_from_slice ( data , 4 .. 4 + len) ?;
97201 Ok ( string)
98202}
99203
100204/// Decodes a short string from the value section of a variant.
101- pub ( crate ) fn decode_short_string ( value : & [ u8 ] ) -> Result < & str , ArrowError > {
102- let len = ( first_byte_from_slice ( value) ? >> 2 ) as usize ;
103-
104- let string = string_from_slice ( value, 1 ..1 + len) ?;
205+ pub ( crate ) fn decode_short_string ( metadata : u8 , data : & [ u8 ] ) -> Result < & str , ArrowError > {
206+ let len = ( metadata >> 2 ) as usize ;
207+ let string = string_from_slice ( data, 0 ..len) ?;
105208 Ok ( string)
106209}
107210
@@ -111,47 +214,152 @@ mod tests {
111214
112215 #[ test]
113216 fn test_i8 ( ) -> Result < ( ) , ArrowError > {
114- let value = [
115- 3 << 2 , // Primitive type for i8
116- 42 ,
117- ] ;
118- let result = decode_int8 ( & value) ?;
217+ let data = [ 0x2a ] ;
218+ let result = decode_int8 ( & data) ?;
119219 assert_eq ! ( result, 42 ) ;
120220 Ok ( ( ) )
121221 }
122222
123223 #[ test]
124- fn test_short_string ( ) -> Result < ( ) , ArrowError > {
125- let value = [
126- 1 | 5 << 2 , // Basic type for short string | length of short string
127- b'H' ,
128- b'e' ,
129- b'l' ,
130- b'l' ,
131- b'o' ,
132- b'o' ,
224+ fn test_i16 ( ) -> Result < ( ) , ArrowError > {
225+ let data = [ 0xd2 , 0x04 ] ;
226+ let result = decode_int16 ( & data) ?;
227+ assert_eq ! ( result, 1234 ) ;
228+ Ok ( ( ) )
229+ }
230+
231+ #[ test]
232+ fn test_i32 ( ) -> Result < ( ) , ArrowError > {
233+ let data = [ 0x40 , 0xe2 , 0x01 , 0x00 ] ;
234+ let result = decode_int32 ( & data) ?;
235+ assert_eq ! ( result, 123456 ) ;
236+ Ok ( ( ) )
237+ }
238+
239+ #[ test]
240+ fn test_i64 ( ) -> Result < ( ) , ArrowError > {
241+ let data = [ 0x15 , 0x81 , 0xe9 , 0x7d , 0xf4 , 0x10 , 0x22 , 0x11 ] ;
242+ let result = decode_int64 ( & data) ?;
243+ assert_eq ! ( result, 1234567890123456789 ) ;
244+ Ok ( ( ) )
245+ }
246+
247+ #[ test]
248+ fn test_decimal4 ( ) -> Result < ( ) , ArrowError > {
249+ let data = [
250+ 0x02 , // Scale
251+ 0xd2 , 0x04 , 0x00 , 0x00 , // Integer
133252 ] ;
134- let result = decode_short_string ( & value) ?;
253+ let result = decode_decimal4 ( & data) ?;
254+ assert_eq ! ( result, ( 1234 , 2 ) ) ;
255+ Ok ( ( ) )
256+ }
257+
258+ #[ test]
259+ fn test_decimal8 ( ) -> Result < ( ) , ArrowError > {
260+ let data = [
261+ 0x02 , // Scale
262+ 0xd2 , 0x02 , 0x96 , 0x49 , 0x00 , 0x00 , 0x00 , 0x00 , // Integer
263+ ] ;
264+ let result = decode_decimal8 ( & data) ?;
265+ assert_eq ! ( result, ( 1234567890 , 2 ) ) ;
266+ Ok ( ( ) )
267+ }
268+
269+ #[ test]
270+ fn test_decimal16 ( ) -> Result < ( ) , ArrowError > {
271+ let data = [
272+ 0x02 , // Scale
273+ 0xd2 , 0xb6 , 0x23 , 0xc0 , 0xf4 , 0x10 , 0x22 , 0x11 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
274+ 0x00 , 0x00 , // Integer
275+ ] ;
276+ let result = decode_decimal16 ( & data) ?;
277+ assert_eq ! ( result, ( 1234567891234567890 , 2 ) ) ;
278+ Ok ( ( ) )
279+ }
280+
281+ #[ test]
282+ fn test_float ( ) -> Result < ( ) , ArrowError > {
283+ let data = [ 0x06 , 0x2c , 0x93 , 0x4e ] ;
284+ let result = decode_float ( & data) ?;
285+ assert_eq ! ( result, 1234567890.1234 ) ;
286+ Ok ( ( ) )
287+ }
288+
289+ #[ test]
290+ fn test_double ( ) -> Result < ( ) , ArrowError > {
291+ let data = [ 0xc9 , 0xe5 , 0x87 , 0xb4 , 0x80 , 0x65 , 0xd2 , 0x41 ] ;
292+ let result = decode_double ( & data) ?;
293+ assert_eq ! ( result, 1234567890.1234 ) ;
294+ Ok ( ( ) )
295+ }
296+
297+ #[ test]
298+ fn test_date ( ) -> Result < ( ) , ArrowError > {
299+ let data = [ 0xe2 , 0x4e , 0x0 , 0x0 ] ;
300+ let result = decode_date ( & data) ?;
301+ assert_eq ! ( result, NaiveDate :: from_ymd_opt( 2025 , 4 , 16 ) . unwrap( ) ) ;
302+ Ok ( ( ) )
303+ }
304+
305+ #[ test]
306+ fn test_timestamp_micros ( ) -> Result < ( ) , ArrowError > {
307+ let data = [ 0xe0 , 0x52 , 0x97 , 0xdd , 0xe7 , 0x32 , 0x06 , 0x00 ] ;
308+ let result = decode_timestamp_micros ( & data) ?;
309+ assert_eq ! (
310+ result,
311+ NaiveDate :: from_ymd_opt( 2025 , 4 , 16 )
312+ . unwrap( )
313+ . and_hms_milli_opt( 16 , 34 , 56 , 780 )
314+ . unwrap( )
315+ . and_utc( )
316+ ) ;
317+ Ok ( ( ) )
318+ }
319+
320+ #[ test]
321+ fn test_timestampntz_micros ( ) -> Result < ( ) , ArrowError > {
322+ let data = [ 0xe0 , 0x52 , 0x97 , 0xdd , 0xe7 , 0x32 , 0x06 , 0x00 ] ;
323+ let result = decode_timestampntz_micros ( & data) ?;
324+ assert_eq ! (
325+ result,
326+ NaiveDate :: from_ymd_opt( 2025 , 4 , 16 )
327+ . unwrap( )
328+ . and_hms_milli_opt( 16 , 34 , 56 , 780 )
329+ . unwrap( )
330+ ) ;
331+ Ok ( ( ) )
332+ }
333+
334+ #[ test]
335+ fn test_binary ( ) -> Result < ( ) , ArrowError > {
336+ let data = [
337+ 0x09 , 0 , 0 , 0 , // Length of binary data, 4-byte little-endian
338+ 0x03 , 0x13 , 0x37 , 0xde , 0xad , 0xbe , 0xef , 0xca , 0xfe ,
339+ ] ;
340+ let result = decode_binary ( & data) ?;
341+ assert_eq ! (
342+ result,
343+ [ 0x03 , 0x13 , 0x37 , 0xde , 0xad , 0xbe , 0xef , 0xca , 0xfe ]
344+ ) ;
345+ Ok ( ( ) )
346+ }
347+
348+ #[ test]
349+ fn test_short_string ( ) -> Result < ( ) , ArrowError > {
350+ let data = [ b'H' , b'e' , b'l' , b'l' , b'o' , b'o' ] ;
351+ let result = decode_short_string ( 1 | 5 << 2 , & data) ?;
135352 assert_eq ! ( result, "Hello" ) ;
136353 Ok ( ( ) )
137354 }
138355
139356 #[ test]
140357 fn test_string ( ) -> Result < ( ) , ArrowError > {
141- let value = [
142- 16 << 2 , // Basic type for short string | length of short string
143- 5 ,
144- 0 ,
145- 0 ,
146- 0 , // Length of string
147- b'H' ,
148- b'e' ,
149- b'l' ,
150- b'l' ,
151- b'o' ,
152- b'o' ,
358+ let data = [
359+ 0x05 , 0 , 0 , 0 , // Length of string, 4-byte little-endian
360+ b'H' , b'e' , b'l' , b'l' , b'o' , b'o' ,
153361 ] ;
154- let result = decode_long_string ( & value ) ?;
362+ let result = decode_long_string ( & data ) ?;
155363 assert_eq ! ( result, "Hello" ) ;
156364 Ok ( ( ) )
157365 }
0 commit comments