snowflakedb
diff --git a/‎CHANGELOG.md‎
Lines changed: 9 additions & 2 deletions b/‎CHANGELOG.md‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎docs/source/modin/supported/dataframe_supported.rst‎
Lines changed: 5 additions & 1 deletion b/‎docs/source/modin/supported/dataframe_supported.rst‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎docs/source/modin/supported/series_supported.rst‎
Lines changed: 5 additions & 1 deletion b/‎docs/source/modin/supported/series_supported.rst‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/snowflake/snowpark/_internal/data_source/drivers/base_driver.py‎
Lines changed: 25 additions & 6 deletions b/‎src/snowflake/snowpark/_internal/data_source/drivers/base_driver.py‎
Lines changed: 25 additions & 6 deletions
diff --git a/‎src/snowflake/snowpark/_internal/server_connection.py‎
Lines changed: 1 addition & 0 deletions b/‎src/snowflake/snowpark/_internal/server_connection.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/snowflake/snowpark/dataframe_reader.py‎
Lines changed: 18 additions & 12 deletions b/‎src/snowflake/snowpark/dataframe_reader.py‎
Lines changed: 18 additions & 12 deletions
diff --git a/‎src/snowflake/snowpark/functions.py‎
Lines changed: 30 additions & 14 deletions b/‎src/snowflake/snowpark/functions.py‎
Lines changed: 30 additions & 14 deletions
@@ -58,7 +58,7 @@
     - `st_geometryfromwkt`
     - `try_to_geography`
     - `try_to_geometry`
-
+  
   - String and Binary functions:
     - `base64_decode_binary`
     - `compress`
@@ -70,7 +70,8 @@
     - `sha1_binary`
     - `sha2_binary`
     - `soundex_p123`
-
+    
+- Added a parameter to enable and disable automatic column name aliasing for `interval_day_time_from_parts` and `interval_year_month_from_parts` functions.
 
 #### Bug Fixes
 
@@ -79,15 +80,21 @@
 - Fixed a bug where writing Snowpark pandas dataframes on the pandas backend with a column multiindex to Snowflake with `to_snowflake` would raise `KeyError`.
 - Fixed a bug that `DataFrameReader.dbapi` (PuPr) is not compatible with oracledb 3.4.0.
 
+#### Improvements
+
+- The default maximum length for inferred StringType columns during schema inference in `DataFrameReader.dbapi` is now increased from 16MB to 128MB in parquet file based ingestion.
+
 #### Dependency Updates
 
 - Updated dependency of `snowflake-connector-python>=3.17,<5.0.0`.
 
 ### Snowpark pandas API Updates
 
 #### New Features
+
 - Added support for the `dtypes` parameter of `pd.get_dummies`
 - Added support for `nunique` in `df.pivot_table`, `df.agg` and other places where aggregate functions can be used.
+- Added support for `DataFrame.interpolate` and `Series.interpolate` with the "linear", "ffill"/"pad", and "backfill"/bfill" methods. These use the SQL `INTERPOLATE_LINEAR`, `INTERPOLATE_FFILL`, and `INTERPOLATE_BFILL` functions (PuPr).
 
 #### Improvements
 
 
@@ -227,7 +227,11 @@ Methods
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``insert``                  | Y                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
-| ``interpolate``             | N                               |                                  |                                                    |
+| ``interpolate``             | P                               |                                  | ``N`` if ``axis == 1``, ``limit`` is set,          |
+|                             |                                 |                                  | ``limit_area`` is "outside", or ``method`` is not  |
+|                             |                                 |                                  | "linear", "bfill", "backfill", "ffill", or "pad".  |
+|                             |                                 |                                  | ``limit_area="inside"`` is supported only when     |
+|                             |                                 |                                  | ``method`` is ``linear``.                          |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``isetitem``                | N                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 
@@ -243,7 +243,11 @@ Methods
 | ``info``                    | D                               |                                  | Different Index types are used in pandas but not   |
 |                             |                                 |                                  | in Snowpark pandas                                 |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
-| ``interpolate``             | N                               |                                  |                                                    |
+| ``interpolate``             | P                               |                                  | ``N`` if ``limit`` is set,                         |
+|                             |                                 |                                  | ``limit_area`` is "outside", or ``method`` is not  |
+|                             |                                 |                                  | "linear", "bfill", "backfill", "ffill", or "pad".  |
+|                             |                                 |                                  | ``limit_area="inside"`` is supported only when     |
+|                             |                                 |                                  | ``method`` is ``linear``.                          |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``isin``                    | Y                               |                                  | Snowpark pandas deviates with respect to handling  |
 |                             |                                 |                                  | NA values                                          |
 
@@ -11,6 +11,7 @@
     Connection,
     Cursor,
 )
+from snowflake.snowpark._internal.server_connection import MAX_STRING_SIZE
 from snowflake.snowpark._internal.utils import (
     get_sorted_key_for_version,
     measure_time,
@@ -27,6 +28,7 @@
     BinaryType,
     DateType,
     BooleanType,
+    StringType,
 )
 import snowflake.snowpark
 import logging
@@ -103,7 +105,16 @@ def infer_schema_from_description(
         query_input_alias: str,
     ) -> StructType:
         self.get_raw_schema(table_or_query, cursor, is_query, query_input_alias)
-        return self.to_snow_type(self.raw_schema)
+        generated_schema = self.to_snow_type(self.raw_schema)
+        # snowflake will default string length to 128MB in the bundle which will be enabled in 2026-01
+        # https://docs.snowflake.com/en/release-notes/bcr-bundles/2025_07_bundle
+        # here we prematurely make the change to default string to
+        # 1. align the string length with UDTF based ingestion
+        # 2. avoid the BCR impact to dbapi feature
+        for field in generated_schema.fields:
+            if isinstance(field.datatype, StringType) and field.datatype.length is None:
+                field.datatype.length = MAX_STRING_SIZE
+        return generated_schema
 
     def infer_schema_from_description_with_error_control(
         self, table_or_query: str, is_query: bool, query_input_alias: str
@@ -184,7 +195,10 @@ def udtf_ingestion(
             select * from {partition_table}, table({udtf_name}({PARTITION_TABLE_COLUMN_NAME}))
             """
         res = session.sql(call_udtf_sql, _emit_ast=_emit_ast)
-        return self.to_result_snowpark_df_udtf(res, schema, _emit_ast=_emit_ast)
+        return BaseDriver.keep_nullable_attributes(
+            self.to_result_snowpark_df_udtf(res, schema, _emit_ast=_emit_ast),
+            schema,
+        )
 
     def udtf_class_builder(
         self,
@@ -284,6 +298,14 @@ def to_result_snowpark_df(
     ) -> "DataFrame":
         return session.table(table_name, _emit_ast=_emit_ast)
 
+    @staticmethod
+    def keep_nullable_attributes(
+        selected_df: "DataFrame", schema: StructType
+    ) -> "DataFrame":
+        for attr, source_field in zip(selected_df._plan.attributes, schema.fields):
+            attr.nullable = source_field.nullable
+        return selected_df
+
     @staticmethod
     def to_result_snowpark_df_udtf(
         res_df: "DataFrame",
@@ -294,10 +316,7 @@ def to_result_snowpark_df_udtf(
             res_df[field.name].cast(field.datatype).alias(field.name)
             for field in schema.fields
         ]
-        selected_df = res_df.select(cols, _emit_ast=_emit_ast)
-        for attr, source_field in zip(selected_df._plan.attributes, schema.fields):
-            attr.nullable = source_field.nullable
-        return selected_df
+        return res_df.select(cols, _emit_ast=_emit_ast)
 
     def get_server_cursor_if_supported(self, conn: "Connection") -> "Cursor":
         """
 
@@ -86,6 +86,7 @@
 PARAM_INTERNAL_APPLICATION_NAME = "internal_application_name"
 PARAM_INTERNAL_APPLICATION_VERSION = "internal_application_version"
 DEFAULT_STRING_SIZE = 16777216
+MAX_STRING_SIZE = 134217728
 
 
 def _build_target_path(stage_location: str, dest_prefix: str = "") -> str:
 
@@ -1707,18 +1707,24 @@ def dbapi(
         Reads data from a database table or query into a DataFrame using a DBAPI connection,
         with support for optional partitioning, parallel processing, and query customization.
 
-        There are multiple methods to partition data and accelerate ingestion.
-        These methods can be combined to achieve optimal performance:
-
-        1.Use column, lower_bound, upper_bound and num_partitions at the same time when you need to split large tables into smaller partitions for parallel processing.
-        These must all be specified together, otherwise error will be raised.
-        2.Set max_workers to a proper positive integer.
-        This defines the maximum number of processes and threads used for parallel execution.
-        3.Adjusting fetch_size can optimize performance by reducing the number of round trips to the database.
-        4.Use predicates to defining WHERE conditions for partitions,
-        predicates will be ignored if column is specified to generate partition.
-        5.Set custom_schema to avoid snowpark infer schema, custom_schema must have a matched
-        column name with table in external data source.
+        Usage Notes:
+            - Ingestion performance tuning:
+                - **Partitioning**: Use ``column``, ``lower_bound``, ``upper_bound``, and ``num_partitions``
+                  together to split large tables into smaller partitions for parallel processing.
+                  All four parameters must be specified together, otherwise an error will be raised.
+                - **Parallel execution**: Set ``max_workers`` to control the maximum number of processes
+                  and threads used for parallel execution.
+                - **Fetch optimization**: Adjust ``fetch_size`` to optimize performance by reducing
+                  the number of round trips to the database.
+                - **Partition filtering**: Use ``predicates`` to define WHERE conditions for partitions.
+                  Note that ``predicates`` will be ignored if ``column`` is specified for partitioning.
+                - **Schema specification**: Set ``custom_schema`` to skip schema inference. The custom schema
+                  must have matching column names with the table in the external data source.
+            - Execution timing and error handling:
+                - **UDTF Ingestion**: Uses lazy evaluation. Errors are reported as ``SnowparkSQLException``
+                  during DataFrame actions (e.g., ``DataFrame.collect()``).
+                - **Local Ingestion**: Uses eager execution. Errors are reported immediately as
+                  ``SnowparkDataFrameReaderException`` when this method is called.
 
         Args:
             create_connection: A callable that returns a DB-API compatible database connection.
 
@@ -11031,6 +11031,7 @@ def make_interval(
 def interval_year_month_from_parts(
     years: Optional[ColumnOrName] = None,
     months: Optional[ColumnOrName] = None,
+    _alias_column_name: Optional[bool] = True,
     _emit_ast: bool = True,
 ) -> Column:
     """
@@ -11042,6 +11043,7 @@ def interval_year_month_from_parts(
     Args:
         years: The number of years, positive or negative
         months: The number of months, positive or negative
+        _alias_column_name: If true, alias the column name to a cleaner value
 
     Returns:
         A Column representing a year-month interval
@@ -11091,15 +11093,21 @@ def interval_year_month_from_parts(
     )
     interval_string = concat(sign_prefix, normalized_years, lit("-"), normalized_months)
 
-    def get_col_name(col):
-        if isinstance(col._expr1, Literal):
-            return str(col._expr1.value)
-        else:
-            return col._expression.name
+    res = cast(interval_string, "INTERVAL YEAR TO MONTH")
+    if _alias_column_name:
+        # Aliasing column names when using this in a case when will throw an error. This allows us to only alias
+        # when necessary.
+
+        def get_col_name(col):
+            if isinstance(col._expr1, Literal):
+                return str(col._expr1.value)
+            else:
+                return col._expression.name
+
+        alias_name = f"interval_year_month_from_parts({get_col_name(years_col)}, {get_col_name(months_col)})"
 
-    alias_name = f"interval_year_month_from_parts({get_col_name(years_col)}, {get_col_name(months_col)})"
+        res = res.alias(alias_name)
 
-    res = cast(interval_string, "INTERVAL YEAR TO MONTH").alias(alias_name)
     res._ast = ast
     return res
 
@@ -11114,6 +11122,7 @@ def interval_day_time_from_parts(
     hours: Optional[ColumnOrName] = None,
     mins: Optional[ColumnOrName] = None,
     secs: Optional[ColumnOrName] = None,
+    _alias_column_name: Optional[bool] = True,
     _emit_ast: bool = True,
 ) -> Column:
     """
@@ -11127,6 +11136,7 @@ def interval_day_time_from_parts(
         hours: The number of hours, positive or negative
         mins: The number of minutes, positive or negative
         secs: The number of seconds, positive or negative
+        _alias_column_name: If true, alias the column name to a cleaner value
 
     Returns:
         A Column representing a day-time interval
@@ -11238,15 +11248,21 @@ def interval_day_time_from_parts(
         secs_formatted,
     )
 
-    def get_col_name(col):
-        if isinstance(col._expr1, Literal):
-            return str(col._expr1.value)
-        else:
-            return str(col._expr1)
+    res = cast(interval_value, "INTERVAL DAY TO SECOND")
+    if _alias_column_name:
+        # Aliasing column names when using this in a case when will throw an error. This allows us to only alias
+        # when necessary.
+
+        def get_col_name(col):
+            if isinstance(col._expr1, Literal):
+                return str(col._expr1.value)
+            else:
+                return str(col._expr1)
+
+        alias_name = f"interval_day_time_from_parts({get_col_name(days_col)}, {get_col_name(hours_col)}, {get_col_name(mins_col)}, {get_col_name(secs_col)})"
 
-    alias_name = f"interval_day_time_from_parts({get_col_name(days_col)}, {get_col_name(hours_col)}, {get_col_name(mins_col)}, {get_col_name(secs_col)})"
+        res = res.alias(alias_name)
 
-    res = cast(interval_value, "INTERVAL DAY TO SECOND").alias(alias_name)
     res._ast = ast
     return res