@@ -19,6 +19,30 @@ use etl::types::{
19
19
} ;
20
20
use std:: sync:: Arc ;
21
21
22
+ /// Extract numeric precision from Postgres atttypmod
23
+ /// Based on: https://stackoverflow.com/questions/72725508/how-to-calculate-numeric-precision-and-other-vals-from-atttypmod
24
+ fn extract_numeric_precision ( atttypmod : i32 ) -> u8 {
25
+ if atttypmod == -1 {
26
+ // No limit specified, use maximum precision
27
+ 38
28
+ } else {
29
+ let precision = ( ( atttypmod - 4 ) >> 16 ) & 65535 ;
30
+ std:: cmp:: min ( precision as u8 , 38 ) // Cap at Arrow's max precision
31
+ }
32
+ }
33
+
34
+ /// Extract numeric scale from Postgres atttypmod
35
+ /// Based on: https://stackoverflow.com/questions/72725508/how-to-calculate-numeric-precision-and-other-vals-from-atttypmod
36
+ fn extract_numeric_scale ( atttypmod : i32 ) -> i8 {
37
+ if atttypmod == -1 {
38
+ // No limit specified, use reasonable default scale
39
+ 18
40
+ } else {
41
+ let scale = ( atttypmod - 4 ) & 65535 ;
42
+ std:: cmp:: min ( scale as i8 , 38 ) // Cap at reasonable scale
43
+ }
44
+ }
45
+
22
46
/// Converts TableRows to Arrow RecordBatch for Delta Lake writes
23
47
pub struct TableRowEncoder ;
24
48
@@ -57,7 +81,8 @@ impl TableRowEncoder {
57
81
. column_schemas
58
82
. iter ( )
59
83
. map ( |col_schema| {
60
- let data_type = Self :: postgres_type_to_arrow_type ( & col_schema. typ ) ;
84
+ let data_type =
85
+ Self :: postgres_type_to_arrow_type ( & col_schema. typ , col_schema. modifier ) ;
61
86
ArrowField :: new ( & col_schema. name , data_type, col_schema. nullable )
62
87
} )
63
88
. collect ( ) ;
@@ -66,7 +91,7 @@ impl TableRowEncoder {
66
91
}
67
92
68
93
/// Map Postgres types to appropriate Arrow types
69
- pub ( crate ) fn postgres_type_to_arrow_type ( pg_type : & PGType ) -> ArrowDataType {
94
+ pub ( crate ) fn postgres_type_to_arrow_type ( pg_type : & PGType , modifier : i32 ) -> ArrowDataType {
70
95
match * pg_type {
71
96
// Boolean types
72
97
PGType :: BOOL => ArrowDataType :: Boolean ,
@@ -131,15 +156,20 @@ impl TableRowEncoder {
131
156
ArrowDataType :: Float64 ,
132
157
true ,
133
158
) ) ) ,
134
-
135
- // Decimal types - use high precision for NUMERIC
136
- PGType :: NUMERIC => ArrowDataType :: Decimal128 ( 38 , 18 ) , // Max precision, reasonable scale
137
- PGType :: NUMERIC_ARRAY => ArrowDataType :: List ( Arc :: new ( ArrowField :: new (
138
- "item" ,
139
- ArrowDataType :: Decimal128 ( 38 , 18 ) ,
140
- true ,
141
- ) ) ) ,
142
-
159
+ PGType :: NUMERIC => {
160
+ let precision = extract_numeric_precision ( modifier) ;
161
+ let scale = extract_numeric_scale ( modifier) ;
162
+ ArrowDataType :: Decimal128 ( precision, scale)
163
+ }
164
+ PGType :: NUMERIC_ARRAY => {
165
+ let precision = extract_numeric_precision ( modifier) ;
166
+ let scale = extract_numeric_scale ( modifier) ;
167
+ ArrowDataType :: List ( Arc :: new ( ArrowField :: new (
168
+ "item" ,
169
+ ArrowDataType :: Decimal128 ( precision, scale) ,
170
+ true ,
171
+ ) ) )
172
+ }
143
173
// Date/Time types
144
174
PGType :: DATE => ArrowDataType :: Date32 ,
145
175
PGType :: DATE_ARRAY => ArrowDataType :: List ( Arc :: new ( ArrowField :: new (
@@ -623,39 +653,38 @@ impl TableRowEncoder {
623
653
/// Convert cells to decimal128 array
624
654
fn convert_to_decimal128_array (
625
655
cells : Vec < & PGCell > ,
626
- _precision : u8 ,
627
- _scale : i8 ,
656
+ precision : u8 ,
657
+ scale : i8 ,
628
658
) -> Result < ArrayRef , ArrowError > {
629
659
let values: Vec < Option < i128 > > = cells
630
660
. iter ( )
631
661
. map ( |cell| match cell {
632
662
PGCell :: Null => None ,
633
663
PGCell :: Numeric ( n) => {
634
- // Convert PgNumeric to decimal128
635
664
// This is a simplified conversion - ideally we'd preserve the exact decimal representation
636
665
if let Ok ( string_val) = n. to_string ( ) . parse :: < f64 > ( ) {
637
666
// Scale up by the scale factor and convert to i128
638
- let scaled = ( string_val * 10_f64 . powi ( _scale as i32 ) ) as i128 ;
667
+ let scaled = ( string_val * 10_f64 . powi ( scale as i32 ) ) as i128 ;
639
668
Some ( scaled)
640
669
} else {
641
670
None
642
671
}
643
672
}
644
- PGCell :: I16 ( i) => Some ( * i as i128 * 10_i128 . pow ( _scale as u32 ) ) ,
645
- PGCell :: I32 ( i) => Some ( * i as i128 * 10_i128 . pow ( _scale as u32 ) ) ,
646
- PGCell :: I64 ( i) => Some ( * i as i128 * 10_i128 . pow ( _scale as u32 ) ) ,
647
- PGCell :: U32 ( i) => Some ( * i as i128 * 10_i128 . pow ( _scale as u32 ) ) ,
673
+ PGCell :: I16 ( i) => Some ( * i as i128 * 10_i128 . pow ( scale as u32 ) ) ,
674
+ PGCell :: I32 ( i) => Some ( * i as i128 * 10_i128 . pow ( scale as u32 ) ) ,
675
+ PGCell :: I64 ( i) => Some ( * i as i128 * 10_i128 . pow ( scale as u32 ) ) ,
676
+ PGCell :: U32 ( i) => Some ( * i as i128 * 10_i128 . pow ( scale as u32 ) ) ,
648
677
PGCell :: F32 ( f) => {
649
- let scaled = ( * f as f64 * 10_f64 . powi ( _scale as i32 ) ) as i128 ;
678
+ let scaled = ( * f as f64 * 10_f64 . powi ( scale as i32 ) ) as i128 ;
650
679
Some ( scaled)
651
680
}
652
681
PGCell :: F64 ( f) => {
653
- let scaled = ( f * 10_f64 . powi ( _scale as i32 ) ) as i128 ;
682
+ let scaled = ( f * 10_f64 . powi ( scale as i32 ) ) as i128 ;
654
683
Some ( scaled)
655
684
}
656
685
PGCell :: String ( s) => {
657
686
if let Ok ( val) = s. parse :: < f64 > ( ) {
658
- let scaled = ( val * 10_f64 . powi ( _scale as i32 ) ) as i128 ;
687
+ let scaled = ( val * 10_f64 . powi ( scale as i32 ) ) as i128 ;
659
688
Some ( scaled)
660
689
} else {
661
690
None
@@ -664,7 +693,11 @@ impl TableRowEncoder {
664
693
_ => None ,
665
694
} )
666
695
. collect ( ) ;
667
- Ok ( Arc :: new ( Decimal128Array :: from ( values) ) )
696
+
697
+ let decimal_type = ArrowDataType :: Decimal128 ( precision, scale) ;
698
+ Ok ( Arc :: new (
699
+ Decimal128Array :: from ( values) . with_data_type ( decimal_type) ,
700
+ ) )
668
701
}
669
702
670
703
/// Convert cells to list array for array types
@@ -713,7 +746,7 @@ impl TableRowEncoder {
713
746
/// Convert a Postgres type to Delta DataType using delta-kernel's conversion traits
714
747
#[ allow( dead_code) ]
715
748
pub ( crate ) fn postgres_type_to_delta ( typ : & PGType ) -> Result < DeltaDataType , ArrowError > {
716
- let arrow_type = TableRowEncoder :: postgres_type_to_arrow_type ( typ) ;
749
+ let arrow_type = TableRowEncoder :: postgres_type_to_arrow_type ( typ, - 1 ) ;
717
750
DeltaDataType :: try_from_arrow ( & arrow_type)
718
751
}
719
752
@@ -723,7 +756,7 @@ pub(crate) fn postgres_to_delta_schema(schema: &PGTableSchema) -> DeltaResult<De
723
756
. column_schemas
724
757
. iter ( )
725
758
. map ( |col| {
726
- let arrow_type = TableRowEncoder :: postgres_type_to_arrow_type ( & col. typ ) ;
759
+ let arrow_type = TableRowEncoder :: postgres_type_to_arrow_type ( & col. typ , col . modifier ) ;
727
760
let delta_data_type = DeltaDataType :: try_from_arrow ( & arrow_type)
728
761
. map_err ( |e| deltalake:: DeltaTableError :: Generic ( e. to_string ( ) ) ) ?;
729
762
Ok ( DeltaStructField :: new (
@@ -740,6 +773,7 @@ pub(crate) fn postgres_to_delta_schema(schema: &PGTableSchema) -> DeltaResult<De
740
773
#[ cfg( test) ]
741
774
mod tests {
742
775
use super :: * ;
776
+ use delta_kernel:: schema:: { DecimalType , PrimitiveType } ;
743
777
744
778
#[ test]
745
779
fn test_scalar_mappings ( ) {
@@ -780,11 +814,10 @@ mod tests {
780
814
postgres_type_to_delta( & PGType :: BYTEA ) . unwrap( ) ,
781
815
DeltaDataType :: BINARY
782
816
) ) ;
783
- // Test NUMERIC mapping - delta-kernel should handle the conversion
784
- let numeric_result = postgres_type_to_delta ( & PGType :: NUMERIC ) . unwrap ( ) ;
785
- // The actual result depends on delta-kernel's conversion implementation
786
- // For now, just verify the conversion succeeds
787
- println ! ( "NUMERIC maps to: {:?}" , numeric_result) ;
817
+ assert ! ( matches!(
818
+ postgres_type_to_delta( & PGType :: NUMERIC ) . unwrap( ) ,
819
+ DeltaDataType :: Primitive ( PrimitiveType :: Decimal ( DecimalType { .. } ) )
820
+ ) ) ;
788
821
}
789
822
790
823
#[ test]
@@ -881,7 +914,7 @@ mod tests {
881
914
) ;
882
915
883
916
// Test that we can convert back to Arrow
884
- let arrow_type = TableRowEncoder :: postgres_type_to_arrow_type ( & pg_type) ;
917
+ let arrow_type = TableRowEncoder :: postgres_type_to_arrow_type ( & pg_type, - 1 ) ;
885
918
let roundtrip_delta = DeltaDataType :: try_from_arrow ( & arrow_type) ;
886
919
assert ! (
887
920
roundtrip_delta. is_ok( ) ,
@@ -920,45 +953,79 @@ mod tests {
920
953
assert_eq ! ( batch. num_columns( ) , 12 ) ; // All test columns
921
954
}
922
955
956
+ #[ test]
957
+ fn test_decimal_precision_scale_extraction ( ) {
958
+ // Test specific atttypmod values from the Stack Overflow example
959
+ // https://stackoverflow.com/questions/72725508/how-to-calculate-numeric-precision-and-other-vals-from-atttypmod
960
+
961
+ // NUMERIC(5,2) -> atttypmod = 327686
962
+ assert_eq ! ( extract_numeric_precision( 327686 ) , 5 ) ;
963
+ assert_eq ! ( extract_numeric_scale( 327686 ) , 2 ) ;
964
+
965
+ // NUMERIC(5,1) -> atttypmod = 327685
966
+ assert_eq ! ( extract_numeric_precision( 327685 ) , 5 ) ;
967
+ assert_eq ! ( extract_numeric_scale( 327685 ) , 1 ) ;
968
+
969
+ // NUMERIC(6,3) -> atttypmod = 393223
970
+ assert_eq ! ( extract_numeric_precision( 393223 ) , 6 ) ;
971
+ assert_eq ! ( extract_numeric_scale( 393223 ) , 3 ) ;
972
+
973
+ // NUMERIC(4,4) -> atttypmod = 262152
974
+ assert_eq ! ( extract_numeric_precision( 262152 ) , 4 ) ;
975
+ assert_eq ! ( extract_numeric_scale( 262152 ) , 4 ) ;
976
+
977
+ // Test -1 (no limit)
978
+ assert_eq ! ( extract_numeric_precision( -1 ) , 38 ) ; // Max precision
979
+ assert_eq ! ( extract_numeric_scale( -1 ) , 18 ) ; // Default scale
980
+
981
+ let arrow_type = TableRowEncoder :: postgres_type_to_arrow_type ( & PGType :: NUMERIC , 327686 ) ;
982
+ if let ArrowDataType :: Decimal128 ( precision, scale) = arrow_type {
983
+ assert_eq ! ( precision, 5 ) ;
984
+ assert_eq ! ( scale, 2 ) ;
985
+ } else {
986
+ panic ! ( "Expected Decimal128 type, got: {:?}" , arrow_type) ;
987
+ }
988
+ }
989
+
923
990
#[ test]
924
991
fn test_postgres_type_to_arrow_type_mapping ( ) {
925
992
// Test basic types
926
993
assert_eq ! (
927
- TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: BOOL ) ,
994
+ TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: BOOL , - 1 ) ,
928
995
ArrowDataType :: Boolean
929
996
) ;
930
997
assert_eq ! (
931
- TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: INT4 ) ,
998
+ TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: INT4 , - 1 ) ,
932
999
ArrowDataType :: Int32
933
1000
) ;
934
1001
assert_eq ! (
935
- TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: INT8 ) ,
1002
+ TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: INT8 , - 1 ) ,
936
1003
ArrowDataType :: Int64
937
1004
) ;
938
1005
assert_eq ! (
939
- TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: FLOAT8 ) ,
1006
+ TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: FLOAT8 , - 1 ) ,
940
1007
ArrowDataType :: Float64
941
1008
) ;
942
1009
assert_eq ! (
943
- TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: TEXT ) ,
1010
+ TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: TEXT , - 1 ) ,
944
1011
ArrowDataType :: Utf8
945
1012
) ;
946
1013
assert_eq ! (
947
- TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: DATE ) ,
1014
+ TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: DATE , - 1 ) ,
948
1015
ArrowDataType :: Date32
949
1016
) ;
950
1017
assert_eq ! (
951
- TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: TIME ) ,
1018
+ TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: TIME , - 1 ) ,
952
1019
ArrowDataType :: Timestamp ( TimeUnit :: Microsecond , None )
953
1020
) ;
954
1021
assert_eq ! (
955
- TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: BYTEA ) ,
1022
+ TableRowEncoder :: postgres_type_to_arrow_type( & PGType :: BYTEA , - 1 ) ,
956
1023
ArrowDataType :: Binary
957
1024
) ;
958
1025
959
1026
// Test array types
960
1027
if let ArrowDataType :: List ( field) =
961
- TableRowEncoder :: postgres_type_to_arrow_type ( & PGType :: INT4_ARRAY )
1028
+ TableRowEncoder :: postgres_type_to_arrow_type ( & PGType :: INT4_ARRAY , - 1 )
962
1029
{
963
1030
assert_eq ! ( * field. data_type( ) , ArrowDataType :: Int32 ) ;
964
1031
} else {
0 commit comments