Skip to content

Commit 4b46eb5

Browse files
jochenchristclaude
andcommitted
fix: spark exporter decimal precision/scale from customProperties or physicalType (#996)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 2c5beec commit 4b46eb5

File tree

2 files changed

+34
-7
lines changed

2 files changed

+34
-7
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## Unreleased
99

10+
### Fixed
1011

12+
- Spark exporter now supports decimal precision/scale via `customProperties` or parsing from `physicalType` (e.g., `decimal(10,2)`) (#996)
1113

1214
## [0.11.3] - 2026-01-10
1315

datacontract/export/spark_exporter.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import re
23
from typing import List, Optional
34

45
from open_data_contract_standard.model import OpenDataContractStandard, SchemaObject, SchemaProperty
@@ -119,6 +120,34 @@ def _get_custom_property_value(prop: SchemaProperty, key: str) -> Optional[str]:
119120
return None
120121

121122

123+
def _parse_decimal_precision_scale(physical_type: str) -> tuple[Optional[int], Optional[int]]:
124+
"""Parse precision and scale from physicalType like 'decimal(10,2)' or 'numeric(18,4)'."""
125+
match = re.match(r"(?:decimal|numeric)\s*\(\s*(\d+)\s*,\s*(\d+)\s*\)", physical_type, re.IGNORECASE)
126+
if match:
127+
return int(match.group(1)), int(match.group(2))
128+
return None, None
129+
130+
131+
def _get_decimal_type(prop: SchemaProperty) -> types.DecimalType:
132+
"""Get DecimalType: first from customProperties, then parse from physicalType, else Spark defaults."""
133+
# First check customProperties
134+
precision_str = _get_custom_property_value(prop, "precision")
135+
scale_str = _get_custom_property_value(prop, "scale")
136+
if precision_str is not None or scale_str is not None:
137+
precision = int(precision_str) if precision_str else types.DecimalType().precision
138+
scale = int(scale_str) if scale_str else types.DecimalType().scale
139+
return types.DecimalType(precision=precision, scale=scale)
140+
141+
# Fallback: parse from physicalType
142+
if prop.physicalType:
143+
precision, scale = _parse_decimal_precision_scale(prop.physicalType)
144+
if precision is not None:
145+
return types.DecimalType(precision=precision, scale=scale if scale is not None else 0)
146+
147+
# Use Spark defaults
148+
return types.DecimalType()
149+
150+
122151
def _logical_type_to_spark_type(logical_type: str) -> types.DataType:
123152
"""Convert a logical type string to a Spark DataType."""
124153
if logical_type is None:
@@ -216,10 +245,8 @@ def to_spark_data_type(prop: SchemaProperty) -> types.DataType:
216245
if physical_type:
217246
if physical_type in ["string", "varchar", "text", "char", "nvarchar"]:
218247
return types.StringType()
219-
if physical_type in ["decimal", "numeric"]:
220-
precision = _get_logical_type_option(prop, "precision") or 38
221-
scale = _get_logical_type_option(prop, "scale") or 0
222-
return types.DecimalType(precision=precision, scale=scale)
248+
if physical_type in ["decimal", "numeric"] or physical_type.startswith(("decimal(", "numeric(")):
249+
return _get_decimal_type(prop)
223250
if physical_type in ["integer", "int", "int32"]:
224251
return types.IntegerType()
225252
if physical_type in ["long", "bigint", "int64"]:
@@ -244,9 +271,7 @@ def to_spark_data_type(prop: SchemaProperty) -> types.DataType:
244271
case "string":
245272
return types.StringType()
246273
case "number":
247-
precision = _get_logical_type_option(prop, "precision") or 38
248-
scale = _get_logical_type_option(prop, "scale") or 0
249-
return types.DecimalType(precision=precision, scale=scale)
274+
return _get_decimal_type(prop)
250275
case "integer":
251276
return types.LongType()
252277
case "boolean":

0 commit comments

Comments
 (0)