SNOW-2887937: fix dataframewriter loses decimal precision info when writing back to snowflake table (#4024)

sfc-gh-yuwang · web-flow · commit 183f27c08632 · 2025-12-11T10:09:32.000-08:00
diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py
@@ -316,6 +316,9 @@ def convert_sf_to_sp_type(
 
 
 def convert_sp_to_sf_type(datatype: DataType, nullable_override=None) -> str:
+    if context._is_snowpark_connect_compatible_mode:
+        if isinstance(datatype, _IntegralType) and datatype._precision is not None:
+            return f"NUMBER({datatype._precision}, 0)"
     if isinstance(datatype, DecimalType):
         return f"NUMBER({datatype.precision}, {datatype.scale})"
     if isinstance(datatype, IntegerType):
diff --git a/src/snowflake/snowpark/context.py b/src/snowflake/snowpark/context.py
@@ -43,6 +43,11 @@
 # Global flag for fix 2360274. When enabled schema queries will use NULL as a place holder for any values inside structured objects
 _enable_fix_2360274 = False
 
+# internal only dictionary store the default precision of integral types, if the type does not appear in the
+# dictionary, the default precision is None.
+# example: _integral_type_default_precision = {IntegerType: 9}, IntegerType default _precision is 9 now
+_integral_type_default_precision = {}
+
 
 def configure_development_features(
     *,
diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py
@@ -371,7 +371,9 @@ def _fill_ast(self, ast: proto.DataType) -> None:
 # Numeric types
 class _IntegralType(_NumericType):
     def __init__(self, **kwargs) -> None:
-        self._precision = kwargs.pop("_precision", None)
+        self._precision = kwargs.pop(
+            "_precision", context._integral_type_default_precision.get(type(self), None)
+        )
 
         if kwargs != {}:
             raise TypeError(
diff --git a/tests/integ/test_datatypes.py b/tests/integ/test_datatypes.py
@@ -5,10 +5,11 @@
 import os
 import tempfile
 from decimal import Decimal
+from unittest import mock
 
 import pytest
 
-from snowflake.snowpark import DataFrame, Row
+from snowflake.snowpark import DataFrame, Row, context
 from snowflake.snowpark.functions import lit
 from snowflake.snowpark.types import (
     BooleanType,
@@ -19,6 +20,8 @@
     StringType,
     StructField,
     StructType,
+    IntegerType,
+    ShortType,
 )
 from tests.utils import Utils
 
@@ -437,7 +440,6 @@ def test_numeric_type_store_precision_and_scale(session, massive_number, precisi
         # does not have precision information, thus set to default 38.
         df.write.save_as_table(table_name, mode="overwrite", table_type="temp")
         result = session.sql(f"select * from {table_name}")
-        session.sql(f"describe table {table_name}").show()
         datatype = result.schema.fields[0].datatype
         assert isinstance(datatype, LongType)
         assert datatype._precision == 38
@@ -502,3 +504,170 @@ def write_csv(data):
 def test_illegal_argument_intergraltype():
     with pytest.raises(TypeError, match="takes 0 argument but 1 were given"):
         LongType(b=10)
+
+
+@pytest.mark.skipif(
+    "config.getoption('local_testing_mode', default=False)",
+    reason="session.sql not supported by local testing mode",
+)
+@pytest.mark.parametrize("precision", [38, 19, 5, 3])
+def test_write_to_sf_with_correct_precision(session, precision):
+    table_name = Utils.random_table_name()
+
+    with mock.patch.object(context, "_is_snowpark_connect_compatible_mode", True):
+        df = session.create_dataframe(
+            [],
+            StructType([StructField("large_value", DecimalType(precision, 0), True)]),
+        )
+        datatype = df.schema.fields[0].datatype
+        assert datatype._precision == precision
+
+        df.write.save_as_table(table_name, mode="overwrite", table_type="temp")
+        result = session.sql(f"select * from {table_name}")
+        datatype = result.schema.fields[0].datatype
+        assert datatype._precision == precision
+
+
+@pytest.mark.parametrize(
+    "mock_default_precision",
+    [
+        {IntegerType: 5, LongType: 4},
+        {LongType: 19, IntegerType: 10},
+    ],
+)
+def test_integral_type_default_precision(mock_default_precision):
+    with mock.patch(
+        "snowflake.snowpark.context._integral_type_default_precision",
+        mock_default_precision,
+    ):
+        integer_type = IntegerType()
+        assert integer_type._precision == mock_default_precision[IntegerType]
+
+        long_type = LongType()
+        assert long_type._precision == mock_default_precision[LongType]
+
+        short_type = ShortType()
+        assert short_type._precision is None
+
+
+@pytest.mark.skipif(
+    "config.getoption('local_testing_mode', default=False)",
+    reason="session.sql not supported by local testing mode",
+)
+@pytest.mark.parametrize(
+    "mock_default_precision",
+    [
+        {IntegerType: 5, LongType: 4},
+        {LongType: 19, IntegerType: 10},
+    ],
+)
+def test_end_to_end_default_precision(session, mock_default_precision):
+    table_name = Utils.random_table_name()
+
+    with mock.patch.object(
+        context, "_is_snowpark_connect_compatible_mode", True
+    ), mock.patch.object(
+        context, "_integral_type_default_precision", mock_default_precision
+    ):
+
+        schema = StructType(
+            [
+                StructField("D38", DecimalType(38, 0), True),
+                StructField("D19", DecimalType(19, 0), True),
+                StructField("D5", DecimalType(5, 0), True),
+                StructField("D3", DecimalType(3, 0), True),
+                StructField("integer_value", IntegerType(), True),
+                StructField("long_value", LongType(), True),
+            ]
+        )
+
+        df = session.create_dataframe(
+            [],
+            schema,
+        )
+        assert df.schema.fields[0].datatype._precision == 38
+        assert df.schema.fields[1].datatype._precision == 19
+        assert df.schema.fields[2].datatype._precision == 5
+        assert df.schema.fields[3].datatype._precision == 3
+        assert (
+            df.schema.fields[4].datatype._precision
+            == mock_default_precision[IntegerType]
+        )
+        assert (
+            df.schema.fields[5].datatype._precision == mock_default_precision[LongType]
+        )
+
+        df.write.save_as_table(table_name, mode="overwrite", table_type="temp")
+        result = session.sql(f"select * from {table_name}")
+        assert result.schema.fields[0].datatype._precision == 38
+        assert result.schema.fields[1].datatype._precision == 19
+        assert result.schema.fields[2].datatype._precision == 5
+        assert result.schema.fields[3].datatype._precision == 3
+        assert (
+            result.schema.fields[4].datatype._precision
+            == mock_default_precision[IntegerType]
+        )
+        assert (
+            result.schema.fields[5].datatype._precision
+            == mock_default_precision[LongType]
+        )
+
+
+@pytest.mark.skipif(
+    "config.getoption('local_testing_mode', default=False)",
+    reason="relaxed_types not supported by local testing mode",
+)
+@pytest.mark.parametrize("massive_number", ["9" * 38, "5" * 19, "7" * 5])
+def test_default_precision_read_file(session, massive_number):
+    mock_default_precision = {LongType: 19, IntegerType: 10}
+    with mock.patch.object(
+        context, "_is_snowpark_connect_compatible_mode", True
+    ), mock.patch.object(
+        context, "_integral_type_default_precision", mock_default_precision
+    ):
+        stage_name = Utils.random_stage_name()
+        header = ("BIG_NUM",)
+        test_data = [(massive_number,)]
+
+        def write_csv(data):
+            with tempfile.NamedTemporaryFile(
+                mode="w+",
+                delete=False,
+                suffix=".csv",
+                newline="",
+            ) as file:
+                writer = csv.writer(file)
+                writer.writerow(header)
+                for row in data:
+                    writer.writerow(row)
+                return file.name
+
+        file_path = write_csv(test_data)
+
+        try:
+            Utils.create_stage(session, stage_name, is_temporary=True)
+            result = session.file.put(
+                file_path, f"@{stage_name}", auto_compress=False, overwrite=True
+            )
+
+            # Infer schema from only the short file
+            constrained_reader = session.read.options(
+                {
+                    "INFER_SCHEMA": True,
+                    "INFER_SCHEMA_OPTIONS": {"FILES": [result[0].target]},
+                    "PARSE_HEADER": True,
+                    # Only load the short file
+                    "PATTERN": f".*{result[0].target}",
+                }
+            )
+
+            # df1 uses constrained types
+            df1 = constrained_reader.csv(f"@{stage_name}/")
+            datatype = df1.schema.fields[0].datatype
+            assert isinstance(datatype, LongType)
+            assert datatype._precision == len(massive_number)
+
+        finally:
+            Utils.drop_stage(session, stage_name)
+            if os.path.exists(file_path):
+                os.remove(file_path)