11import json
2+ import re
23from typing import List , Optional
34
45from open_data_contract_standard .model import OpenDataContractStandard , SchemaObject , SchemaProperty
@@ -119,6 +120,34 @@ def _get_custom_property_value(prop: SchemaProperty, key: str) -> Optional[str]:
119120 return None
120121
121122
123+ def _parse_decimal_precision_scale (physical_type : str ) -> tuple [Optional [int ], Optional [int ]]:
124+ """Parse precision and scale from physicalType like 'decimal(10,2)' or 'numeric(18,4)'."""
125+ match = re .match (r"(?:decimal|numeric)\s*\(\s*(\d+)\s*,\s*(\d+)\s*\)" , physical_type , re .IGNORECASE )
126+ if match :
127+ return int (match .group (1 )), int (match .group (2 ))
128+ return None , None
129+
130+
131+ def _get_decimal_type (prop : SchemaProperty ) -> types .DecimalType :
132+ """Get DecimalType: first from customProperties, then parse from physicalType, else Spark defaults."""
133+ # First check customProperties
134+ precision_str = _get_custom_property_value (prop , "precision" )
135+ scale_str = _get_custom_property_value (prop , "scale" )
136+ if precision_str is not None or scale_str is not None :
137+ precision = int (precision_str ) if precision_str else types .DecimalType ().precision
138+ scale = int (scale_str ) if scale_str else types .DecimalType ().scale
139+ return types .DecimalType (precision = precision , scale = scale )
140+
141+ # Fallback: parse from physicalType
142+ if prop .physicalType :
143+ precision , scale = _parse_decimal_precision_scale (prop .physicalType )
144+ if precision is not None :
145+ return types .DecimalType (precision = precision , scale = scale if scale is not None else 0 )
146+
147+ # Use Spark defaults
148+ return types .DecimalType ()
149+
150+
122151def _logical_type_to_spark_type (logical_type : str ) -> types .DataType :
123152 """Convert a logical type string to a Spark DataType."""
124153 if logical_type is None :
@@ -216,10 +245,8 @@ def to_spark_data_type(prop: SchemaProperty) -> types.DataType:
216245 if physical_type :
217246 if physical_type in ["string" , "varchar" , "text" , "char" , "nvarchar" ]:
218247 return types .StringType ()
219- if physical_type in ["decimal" , "numeric" ]:
220- precision = _get_logical_type_option (prop , "precision" ) or 38
221- scale = _get_logical_type_option (prop , "scale" ) or 0
222- return types .DecimalType (precision = precision , scale = scale )
248+ if physical_type in ["decimal" , "numeric" ] or physical_type .startswith (("decimal(" , "numeric(" )):
249+ return _get_decimal_type (prop )
223250 if physical_type in ["integer" , "int" , "int32" ]:
224251 return types .IntegerType ()
225252 if physical_type in ["long" , "bigint" , "int64" ]:
@@ -244,9 +271,7 @@ def to_spark_data_type(prop: SchemaProperty) -> types.DataType:
244271 case "string" :
245272 return types .StringType ()
246273 case "number" :
247- precision = _get_logical_type_option (prop , "precision" ) or 38
248- scale = _get_logical_type_option (prop , "scale" ) or 0
249- return types .DecimalType (precision = precision , scale = scale )
274+ return _get_decimal_type (prop )
250275 case "integer" :
251276 return types .LongType ()
252277 case "boolean" :
0 commit comments