@@ -167,9 +167,10 @@ impl VariantParser {
167167 pub fn parse_short_string_header ( header_byte : u8 ) -> Result < ShortStringHeader , ArrowError > {
168168 let length = ( header_byte >> 2 ) as usize ;
169169
170- if length > 13 {
170+ // Short strings can be up to 64 bytes (6-bit value: 0-63)
171+ if length > 63 {
171172 return Err ( ArrowError :: InvalidArgumentError ( format ! (
172- "Short string length {} exceeds maximum of 13 " ,
173+ "Short string length {} exceeds maximum of 63 " ,
173174 length
174175 ) ) ) ;
175176 }
@@ -307,22 +308,23 @@ impl VariantParser {
307308 }
308309
309310 /// Get the data length for a primitive type
310- pub fn get_primitive_data_length ( primitive_type : & PrimitiveType ) -> usize {
311+ /// Returns Some(len) for fixed-length types, None for variable-length types
312+ pub fn get_primitive_data_length ( primitive_type : & PrimitiveType ) -> Option < usize > {
311313 match primitive_type {
312- PrimitiveType :: Null | PrimitiveType :: True | PrimitiveType :: False => 0 ,
313- PrimitiveType :: Int8 => 1 ,
314- PrimitiveType :: Int16 => 2 ,
314+ PrimitiveType :: Null | PrimitiveType :: True | PrimitiveType :: False => Some ( 0 ) ,
315+ PrimitiveType :: Int8 => Some ( 1 ) ,
316+ PrimitiveType :: Int16 => Some ( 2 ) ,
315317 PrimitiveType :: Int32
316318 | PrimitiveType :: Float
317319 | PrimitiveType :: Decimal4
318- | PrimitiveType :: Date => 4 ,
320+ | PrimitiveType :: Date => Some ( 4 ) ,
319321 PrimitiveType :: Int64
320322 | PrimitiveType :: Double
321323 | PrimitiveType :: Decimal8
322324 | PrimitiveType :: TimestampNtz
323- | PrimitiveType :: TimestampLtz => 8 ,
324- PrimitiveType :: Decimal16 => 16 ,
325- PrimitiveType :: Binary | PrimitiveType :: String => 0 , // Variable length, need to read from data
325+ | PrimitiveType :: TimestampLtz => Some ( 8 ) ,
326+ PrimitiveType :: Decimal16 => Some ( 16 ) ,
327+ PrimitiveType :: Binary | PrimitiveType :: String => None , // Variable length, need to read from data
326328 }
327329 }
328330
@@ -357,43 +359,41 @@ impl VariantParser {
357359 let primitive_type = Self :: parse_primitive_header ( value_bytes[ 0 ] ) ?;
358360 let data_length = Self :: get_primitive_data_length ( & primitive_type) ;
359361
360- if data_length == 0 {
361- // Handle variable length types and null/boolean
362- match primitive_type {
363- PrimitiveType :: Null | PrimitiveType :: True | PrimitiveType :: False => Ok ( & [ ] ) ,
364- PrimitiveType :: Binary | PrimitiveType :: String => {
365- // These require reading length from the data
366- if value_bytes. len ( ) < 5 {
367- return Err ( ArrowError :: InvalidArgumentError (
368- "Not enough bytes for variable length primitive" . to_string ( ) ,
369- ) ) ;
370- }
371- let length = u32:: from_le_bytes ( [
372- value_bytes[ 1 ] ,
373- value_bytes[ 2 ] ,
374- value_bytes[ 3 ] ,
375- value_bytes[ 4 ] ,
376- ] ) as usize ;
377- if value_bytes. len ( ) < 5 + length {
378- return Err ( ArrowError :: InvalidArgumentError (
379- "Variable length primitive data exceeds available bytes" . to_string ( ) ,
380- ) ) ;
381- }
382- Ok ( & value_bytes[ 5 ..5 + length] )
362+ match data_length {
363+ Some ( 0 ) => {
364+ // Fixed-length 0-byte types (null/true/false)
365+ Ok ( & [ ] )
366+ }
367+ Some ( len) => {
368+ // Fixed-length types with len bytes
369+ if value_bytes. len ( ) < 1 + len {
370+ return Err ( ArrowError :: InvalidArgumentError ( format ! (
371+ "Fixed length primitive data length {} exceeds available bytes" ,
372+ len
373+ ) ) ) ;
383374 }
384- _ => Err ( ArrowError :: InvalidArgumentError ( format ! (
385- "Unhandled primitive type: {:?}" ,
386- primitive_type
387- ) ) ) ,
375+ Ok ( & value_bytes[ 1 ..1 + len] )
388376 }
389- } else {
390- if value_bytes. len ( ) < 1 + data_length {
391- return Err ( ArrowError :: InvalidArgumentError ( format ! (
392- "Primitive data length {} exceeds available bytes" ,
393- data_length
394- ) ) ) ;
377+ None => {
378+ // Variable-length types (binary/string) - read length from data
379+ if value_bytes. len ( ) < 5 {
380+ return Err ( ArrowError :: InvalidArgumentError (
381+ "Not enough bytes for variable length primitive" . to_string ( ) ,
382+ ) ) ;
383+ }
384+ let length = u32:: from_le_bytes ( [
385+ value_bytes[ 1 ] ,
386+ value_bytes[ 2 ] ,
387+ value_bytes[ 3 ] ,
388+ value_bytes[ 4 ] ,
389+ ] ) as usize ;
390+ if value_bytes. len ( ) < 5 + length {
391+ return Err ( ArrowError :: InvalidArgumentError (
392+ "Variable length primitive data exceeds available bytes" . to_string ( ) ,
393+ ) ) ;
394+ }
395+ Ok ( & value_bytes[ 5 ..5 + length] )
395396 }
396- Ok ( & value_bytes[ 1 ..1 + data_length] )
397397 }
398398 }
399399
@@ -500,14 +500,17 @@ mod tests {
500500 ShortStringHeader { length: 5 }
501501 ) ;
502502
503- // Test 13 -length short string (maximum)
503+ // Test 63 -length short string (maximum for 6-bit value )
504504 assert_eq ! (
505- VariantParser :: parse_short_string_header( 0b00110101 ) . unwrap( ) ,
506- ShortStringHeader { length: 13 }
505+ VariantParser :: parse_short_string_header( 0b11111101 ) . unwrap( ) ,
506+ ShortStringHeader { length: 63 }
507507 ) ;
508508
509- // Test invalid length > 13
510- assert ! ( VariantParser :: parse_short_string_header( 0b00111001 ) . is_err( ) ) ;
509+ // Test that all values 0-63 are valid
510+ for length in 0 ..=63 {
511+ let header_byte = ( length << 2 ) | 1 ; // short string type
512+ assert ! ( VariantParser :: parse_short_string_header( header_byte as u8 ) . is_ok( ) ) ;
513+ }
511514 }
512515
513516 #[ test]
@@ -564,50 +567,55 @@ mod tests {
564567
565568 #[ test]
566569 fn test_get_primitive_data_length ( ) {
570+ // Test fixed-length 0-byte types
567571 assert_eq ! (
568572 VariantParser :: get_primitive_data_length( & PrimitiveType :: Null ) ,
569- 0
573+ Some ( 0 )
570574 ) ;
571575 assert_eq ! (
572576 VariantParser :: get_primitive_data_length( & PrimitiveType :: True ) ,
573- 0
577+ Some ( 0 )
574578 ) ;
575579 assert_eq ! (
576580 VariantParser :: get_primitive_data_length( & PrimitiveType :: False ) ,
577- 0
581+ Some ( 0 )
578582 ) ;
583+
584+ // Test fixed-length types with specific byte counts
579585 assert_eq ! (
580586 VariantParser :: get_primitive_data_length( & PrimitiveType :: Int8 ) ,
581- 1
587+ Some ( 1 )
582588 ) ;
583589 assert_eq ! (
584590 VariantParser :: get_primitive_data_length( & PrimitiveType :: Int16 ) ,
585- 2
591+ Some ( 2 )
586592 ) ;
587593 assert_eq ! (
588594 VariantParser :: get_primitive_data_length( & PrimitiveType :: Int32 ) ,
589- 4
595+ Some ( 4 )
590596 ) ;
591597 assert_eq ! (
592598 VariantParser :: get_primitive_data_length( & PrimitiveType :: Int64 ) ,
593- 8
599+ Some ( 8 )
594600 ) ;
595601 assert_eq ! (
596602 VariantParser :: get_primitive_data_length( & PrimitiveType :: Double ) ,
597- 8
603+ Some ( 8 )
598604 ) ;
599605 assert_eq ! (
600606 VariantParser :: get_primitive_data_length( & PrimitiveType :: Decimal16 ) ,
601- 16
607+ Some ( 16 )
602608 ) ;
609+
610+ // Test variable-length types (should return None)
603611 assert_eq ! (
604612 VariantParser :: get_primitive_data_length( & PrimitiveType :: Binary ) ,
605- 0
606- ) ; // Variable length
613+ None
614+ ) ;
607615 assert_eq ! (
608616 VariantParser :: get_primitive_data_length( & PrimitiveType :: String ) ,
609- 0
610- ) ; // Variable length
617+ None
618+ ) ;
611619 }
612620
613621 #[ test]
0 commit comments