@@ -174,6 +174,50 @@ impl Field {
174
174
}
175
175
}
176
176
177
+ pub fn parquet_type ( & self ) -> proc_macro2:: TokenStream {
178
+ // TODO: Support group types
179
+ // TODO: Add length if dealing with fixedlenbinary
180
+
181
+ let field_name = & self . ident . to_string ( ) ;
182
+ let physical_type = match self . ty . physical_type ( ) {
183
+ parquet:: basic:: Type :: BOOLEAN => quote ! {
184
+ parquet:: basic:: Type :: BOOLEAN
185
+ } ,
186
+ parquet:: basic:: Type :: INT32 => quote ! {
187
+ parquet:: basic:: Type :: INT32
188
+ } ,
189
+ parquet:: basic:: Type :: INT64 => quote ! {
190
+ parquet:: basic:: Type :: INT64
191
+ } ,
192
+ parquet:: basic:: Type :: INT96 => quote ! {
193
+ parquet:: basic:: Type :: INT96
194
+ } ,
195
+ parquet:: basic:: Type :: FLOAT => quote ! {
196
+ parquet:: basic:: Type :: FLOAT
197
+ } ,
198
+ parquet:: basic:: Type :: DOUBLE => quote ! {
199
+ parquet:: basic:: Type :: DOUBLE
200
+ } ,
201
+ parquet:: basic:: Type :: BYTE_ARRAY => quote ! {
202
+ parquet:: basic:: Type :: BYTE_ARRAY
203
+ } ,
204
+ parquet:: basic:: Type :: FIXED_LEN_BYTE_ARRAY => quote ! {
205
+ parquet:: basic:: Type :: FIXED_LEN_BYTE_ARRAY
206
+ } ,
207
+ } ;
208
+ let logical_type = self . ty . logical_type ( ) ;
209
+ let repetition = self . ty . repetition ( ) ;
210
+ quote ! {
211
+ fields. push( ParquetType :: primitive_type_builder( #field_name, #physical_type)
212
+ . with_logical_type( #logical_type)
213
+ . with_repetition( #repetition)
214
+ . build( )
215
+ . unwrap( )
216
+ . into( )
217
+ ) ;
218
+ }
219
+ }
220
+
177
221
fn option_into_vals ( & self ) -> proc_macro2:: TokenStream {
178
222
let field_name = & self . ident ;
179
223
let is_a_byte_buf = self . is_a_byte_buf ;
@@ -201,7 +245,12 @@ impl Field {
201
245
} else if is_a_byte_buf {
202
246
quote ! { Some ( ( & inner[ ..] ) . into( ) ) }
203
247
} else {
204
- quote ! { Some ( inner) }
248
+ // Type might need converting to a physical type
249
+ match self . ty . physical_type ( ) {
250
+ parquet:: basic:: Type :: INT32 => quote ! { Some ( inner as i32 ) } ,
251
+ parquet:: basic:: Type :: INT64 => quote ! { Some ( inner as i64 ) } ,
252
+ _ => quote ! { Some ( inner) } ,
253
+ }
205
254
} ;
206
255
207
256
quote ! {
@@ -232,7 +281,12 @@ impl Field {
232
281
} else if is_a_byte_buf {
233
282
quote ! { ( & rec. #field_name[ ..] ) . into( ) }
234
283
} else {
235
- quote ! { rec. #field_name }
284
+ // Type might need converting to a physical type
285
+ match self . ty . physical_type ( ) {
286
+ parquet:: basic:: Type :: INT32 => quote ! { rec. #field_name as i32 } ,
287
+ parquet:: basic:: Type :: INT64 => quote ! { rec. #field_name as i64 } ,
288
+ _ => quote ! { rec. #field_name } ,
289
+ }
236
290
} ;
237
291
238
292
quote ! {
@@ -403,14 +457,98 @@ impl Type {
403
457
"bool" => BasicType :: BOOLEAN ,
404
458
"u8" | "u16" | "u32" => BasicType :: INT32 ,
405
459
"i8" | "i16" | "i32" | "NaiveDate" => BasicType :: INT32 ,
406
- "u64" | "i64" | "usize" | "NaiveDateTime" => BasicType :: INT64 ,
460
+ "u64" | "i64" | "NaiveDateTime" => BasicType :: INT64 ,
461
+ "usize" | "isize" => {
462
+ if usize:: BITS == 64 {
463
+ BasicType :: INT64
464
+ } else {
465
+ BasicType :: INT32
466
+ }
467
+ }
407
468
"f32" => BasicType :: FLOAT ,
408
469
"f64" => BasicType :: DOUBLE ,
409
470
"String" | "str" | "Uuid" => BasicType :: BYTE_ARRAY ,
410
471
f => unimplemented ! ( "{} currently is not supported" , f) ,
411
472
}
412
473
}
413
474
475
+ fn logical_type ( & self ) -> proc_macro2:: TokenStream {
476
+ let last_part = self . last_part ( ) ;
477
+ let leaf_type = self . leaf_type_recursive ( ) ;
478
+
479
+ match leaf_type {
480
+ Type :: Array ( ref first_type) => {
481
+ if let Type :: TypePath ( _) = * * first_type {
482
+ if last_part == "u8" {
483
+ return quote ! { None } ;
484
+ }
485
+ }
486
+ }
487
+ Type :: Vec ( ref first_type) => {
488
+ if let Type :: TypePath ( _) = * * first_type {
489
+ if last_part == "u8" {
490
+ return quote ! { None } ;
491
+ }
492
+ }
493
+ }
494
+ _ => ( ) ,
495
+ }
496
+
497
+ match last_part. trim ( ) {
498
+ "bool" => quote ! { None } ,
499
+ "u8" => quote ! { Some ( LogicalType :: INTEGER ( IntType {
500
+ bit_width: 8 ,
501
+ is_signed: false ,
502
+ } ) ) } ,
503
+ "u16" => quote ! { Some ( LogicalType :: INTEGER ( IntType {
504
+ bit_width: 16 ,
505
+ is_signed: false ,
506
+ } ) ) } ,
507
+ "u32" => quote ! { Some ( LogicalType :: INTEGER ( IntType {
508
+ bit_width: 32 ,
509
+ is_signed: false ,
510
+ } ) ) } ,
511
+ "u64" => quote ! { Some ( LogicalType :: INTEGER ( IntType {
512
+ bit_width: 64 ,
513
+ is_signed: false ,
514
+ } ) ) } ,
515
+ "i8" => quote ! { Some ( LogicalType :: INTEGER ( IntType {
516
+ bit_width: 8 ,
517
+ is_signed: true ,
518
+ } ) ) } ,
519
+ "i16" => quote ! { Some ( LogicalType :: INTEGER ( IntType {
520
+ bit_width: 16 ,
521
+ is_signed: true ,
522
+ } ) ) } ,
523
+ "i32" | "i64" => quote ! { None } ,
524
+ "usize" => {
525
+ quote ! { Some ( LogicalType :: INTEGER ( IntType {
526
+ bit_width: usize :: BITS as i8 ,
527
+ is_signed: false
528
+ } ) ) }
529
+ }
530
+ "isize" => {
531
+ quote ! { Some ( LogicalType :: INTEGER ( IntType {
532
+ bit_width: usize :: BITS as i8 ,
533
+ is_signed: true
534
+ } ) ) }
535
+ }
536
+ "NaiveDate" => quote ! { Some ( LogicalType :: DATE ( Default :: default ( ) ) ) } ,
537
+ "f32" | "f64" => quote ! { None } ,
538
+ "String" | "str" => quote ! { Some ( LogicalType :: STRING ( Default :: default ( ) ) ) } ,
539
+ "Uuid" => quote ! { Some ( LogicalType :: UUID ( Default :: default ( ) ) ) } ,
540
+ f => unimplemented ! ( "{} currently is not supported" , f) ,
541
+ }
542
+ }
543
+
544
+ fn repetition ( & self ) -> proc_macro2:: TokenStream {
545
+ match & self {
546
+ Type :: Option ( _) => quote ! { Repetition :: OPTIONAL } ,
547
+ Type :: Reference ( _, ty) => ty. repetition ( ) ,
548
+ _ => quote ! { Repetition :: REQUIRED } ,
549
+ }
550
+ }
551
+
414
552
/// Convert a parsed rust field AST in to a more easy to manipulate
415
553
/// parquet_derive::Field
416
554
fn from ( f : & syn:: Field ) -> Self {
@@ -505,7 +643,7 @@ mod test {
505
643
assert_eq ! ( snippet,
506
644
( quote!{
507
645
{
508
- let vals : Vec < _ > = records . iter ( ) . map ( | rec | rec . counter ) . collect ( ) ;
646
+ let vals : Vec < _ > = records . iter ( ) . map ( | rec | rec . counter as i64 ) . collect ( ) ;
509
647
510
648
if let parquet:: column:: writer:: ColumnWriter :: Int64ColumnWriter ( ref mut typed ) = column_writer {
511
649
typed . write_batch ( & vals [ .. ] , None , None ) ?;
@@ -585,7 +723,7 @@ mod test {
585
723
586
724
let vals: Vec <_> = records. iter( ) . filter_map( |rec| {
587
725
if let Some ( inner ) = rec . optional_dumb_int {
588
- Some ( inner )
726
+ Some ( inner as i32 )
589
727
} else {
590
728
None
591
729
}
@@ -636,12 +774,13 @@ mod test {
636
774
struct ABasicStruct {
637
775
yes_no: bool ,
638
776
name: String ,
777
+ length: usize
639
778
}
640
779
} ;
641
780
642
781
let fields = extract_fields ( snippet) ;
643
782
let processed: Vec < _ > = fields. iter ( ) . map ( |field| Field :: from ( field) ) . collect ( ) ;
644
- assert_eq ! ( processed. len( ) , 2 ) ;
783
+ assert_eq ! ( processed. len( ) , 3 ) ;
645
784
646
785
assert_eq ! (
647
786
processed,
@@ -657,6 +796,12 @@ mod test {
657
796
ty: Type :: TypePath ( syn:: parse_quote!( String ) ) ,
658
797
is_a_byte_buf: true ,
659
798
third_party_type: None ,
799
+ } ,
800
+ Field {
801
+ ident: syn:: Ident :: new( "length" , proc_macro2:: Span :: call_site( ) ) ,
802
+ ty: Type :: TypePath ( syn:: parse_quote!( usize ) ) ,
803
+ is_a_byte_buf: false ,
804
+ third_party_type: None ,
660
805
}
661
806
]
662
807
)
0 commit comments