snowflakedb
diff --git a/‎.github/scripts/decrypt_parameters.sh‎
Lines changed: 2 additions & 1 deletion b/‎.github/scripts/decrypt_parameters.sh‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.github/workflows/create-test-branch-from-release.yml‎
Lines changed: 2 additions & 23 deletions b/‎.github/workflows/create-test-branch-from-release.yml‎
Lines changed: 2 additions & 23 deletions
diff --git a/‎.github/workflows/parameters/parameters_aws.py.gpg‎
44 Bytes b/‎.github/workflows/parameters/parameters_aws.py.gpg‎
44 Bytes
diff --git a/‎.github/workflows/parameters/parameters_azure.py.gpg‎
50 Bytes b/‎.github/workflows/parameters/parameters_azure.py.gpg‎
50 Bytes
diff --git a/‎.github/workflows/parameters/parameters_gcp.py.gpg‎
Lines changed: 4 additions & 3 deletions b/‎.github/workflows/parameters/parameters_gcp.py.gpg‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎.github/workflows/parameters/rsa_keys/rsa_key_aws.p8.gpg‎
2.54 KB b/‎.github/workflows/parameters/rsa_keys/rsa_key_aws.p8.gpg‎
2.54 KB
diff --git a/‎.github/workflows/parameters/rsa_keys/rsa_key_azure.p8.gpg‎
2.54 KB b/‎.github/workflows/parameters/rsa_keys/rsa_key_azure.p8.gpg‎
2.54 KB
diff --git a/‎.github/workflows/parameters/rsa_keys/rsa_key_gcp.p8.gpg‎
2.53 KB b/‎.github/workflows/parameters/rsa_keys/rsa_key_gcp.p8.gpg‎
2.53 KB
diff --git a/‎CHANGELOG.md‎
Lines changed: 206 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 206 additions & 1 deletion
diff --git a/‎docs/source/modin/hybrid_execution.rst‎
Lines changed: 12 additions & 4 deletions b/‎docs/source/modin/hybrid_execution.rst‎
Lines changed: 12 additions & 4 deletions
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
 
-gpg --quiet --batch --yes --decrypt --passphrase="$PARAMETER_PASSWORD" --output tests/parameters.py .github/workflows/parameters/parameters_${CLOUD_PROVIDER}.py.gpg
+gpg --quiet --batch --yes --decrypt --passphrase="$PARAMETER_PASSWORD" .github/workflows/parameters/rsa_keys/rsa_key_${CLOUD_PROVIDER}.p8.gpg >> tests/rsa_key_${CLOUD_PROVIDER}.p8
+gpg --quiet --batch --yes --decrypt --passphrase="$PARAMETER_PASSWORD" .github/workflows/parameters/parameters_${CLOUD_PROVIDER}.py.gpg >> tests/parameters.py
 gpg --quiet --batch --yes --decrypt --passphrase="$PARAMETER_PASSWORD" .github/workflows/parameters/parameters_dbapi.py.gpg >> tests/parameters.py
@@ -1,22 +1,11 @@
 # This workflow automatically creates a test branch from a release tag
 # For example, when release v1.40.0 is published, it creates test-v1.40.0 branch
-# Can also be triggered manually to create a test branch from any existing tag
 
 name: Create Test Branch from Release
 
 on:
   release:
     types: [published]
-  workflow_dispatch:
-    inputs:
-      tag_name:
-        description: 'Tag name to create test branch from (e.g., v1.40.0)'
-        required: true
-        type: string
-      test_branch_name:
-        description: 'Test branch name (optional, defaults to test-<tag_name>)'
-        required: false
-        type: string
 
 permissions:
   contents: write
@@ -29,18 +18,8 @@ jobs:
     - name: Extract tag name
       id: extract_tag
       run: |
-        # Determine tag name based on trigger type
-        if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
-          TAG_NAME=${{ inputs.tag_name }}
-          if [ -n "${{ inputs.test_branch_name }}" ]; then
-            TEST_BRANCH_NAME=${{ inputs.test_branch_name }}
-          else
-            TEST_BRANCH_NAME="test-${TAG_NAME}"
-          fi
-        else
-          TAG_NAME=${{ github.event.release.tag_name }}
-          TEST_BRANCH_NAME="test-${TAG_NAME}"
-        fi
+        TAG_NAME=${{ github.event.release.tag_name }}
+        TEST_BRANCH_NAME="test-${TAG_NAME}"
 
         echo "tag_name=${TAG_NAME}" >> $GITHUB_OUTPUT
         echo "test_branch_name=${TEST_BRANCH_NAME}" >> $GITHUB_OUTPUT
 
@@ -1,3 +1,4 @@
-�	��cWhY�^���q�"�a����n�D!��Ks���W�V�t���#�u�/[v����D-������mÊ��6�M���h�u�t�'��yѫ��lwf��/����Ļ�Je�B|v7a�^�:�І�C�7�|,�|8�I���
-t�5�)g��2�y��~�&g�ι��KT�d��F�oH�I0ꕎ��b1�J8��0������w}
-��9��f�'{��NL�G�hX�~�%|�.����G��?+_A'�c.��V�j�>�����7���z�^��b+l�16ܜVA��8V
+�	l���r���������`{����ű�W�Y�L�hT�k��ڭ����_�H�
+H�5{��쬋�ݙRS�2�I�t�'N9�eMq@�����5H��WNJ����iE�zt��ы�X�X�LG�!,O��;�V9v���WIvEFx��%��0g��-��x��cm�N���mŉ���	&\���5C�'3���f�y����0�<;hp܌����4�<����8�;L���,�+oTv�2��'����X���)�ٖr��yF_����N�js����=��'��Z�A��s�u%��ִF�Y���d����"�FѲ�m`���f-�X�
+�:'H��
+���`�^��%�;�{ZJYWl{3�:
 
@@ -1,6 +1,161 @@
 # Release History
 
-## 1.41.0 (YYYY-MM-DD)
+## 1.43.0 (YYYY-MM-DD)
+
+### Snowpark Python API Updates
+
+#### New Features
+
+- Added support for `Session.client_telemetry`.
+- Added support for `Session.udf_profiler`.
+- Added support for `functions.ai_translate`.
+- Added support for the following functions in `functions.py`:
+    - String and Binary functions:
+      - `base64_decode_binary`
+      - `compress`
+      - `decompress_binary`
+      - `decompress_string`
+      - `md5_binary`
+      - `md5_number_lower64`
+      - `md5_number_upper64`
+      - `sha1_binary`
+      - `sha2_binary`
+      - `soundex_p123`
+      - `strtok`
+      - `try_base64_decode_binary`
+      - `try_base64_decode_string`
+      - `try_hex_decode_binary`
+      - `try_hex_decode_string`
+      - `unicode`
+      - `uuid_string`
+      
+    - Conditional expressions:
+      - `booland_agg`
+      - `boolxor_agg`
+      - `regr_valy`
+      - `zeroifnull`
+    
+    - Numeric expressions:
+      - `cot`
+      - `mod`
+      - `pi`
+      - `square`
+      - `width_bucket`
+
+#### Improvements
+
+- Enhanced `DataFrame.sort()` to support `ORDER BY ALL` when no columns are specified.
+- Catalog API now uses SQL commands instead of SnowAPI calls. This new implementation is more reliable now.
+
+#### Dependency Updates
+
+- Catalog API no longer uses types declared in `snowflake.core` and therefore this dependency was removed.
+
+### Snowpark pandas API Updates
+
+#### New Features
+
+- Added support for `Dataframe.groupby.rolling()`.
+- Added support for mapping `np.percentile` with DataFrame and Series inputs to `Series.quantile`.
+- Added support for setting the `random_state` parameter to an integer when calling `DataFrame.sample` or `Series.sample`.
+
+#### Improvements
+
+- Enhanced autoswitching functionality from Snowflake to native pandas for methods with unsupported argument combinations:
+  - `shift()` with `suffix` or non-integer `periods` parameters
+  - `sort_index()` with `axis=1` or `key` parameters
+  - `sort_values()` with `axis=1`
+  - `melt()` with `col_level` parameter
+  - `apply()` with `result_type` parameter for DataFrame
+  - `pivot_table()` with `sort=True`, non-string `index` list, non-string `columns` list, non-string `values` list, or `aggfunc` dict with non-string values
+  - `fillna()` with `downcast` parameter or using `limit` together with `value`
+  - `dropna()` with `axis=1`
+  - `asfreq()` with `how` parameter, `fill_value` parameter, `normalize=True`, or `freq` parameter being week, month, quarter, or year
+  - `groupby()` with `axis=1`, `by!=None and level!=None`, or by containing any non-pandas hashable labels.
+  - `groupby_fillna()` with `downcast` parameter
+  - `groupby_first()` with `min_count>1`
+  - `groupby_last()` with `min_count>1`
+  - `groupby_shift()` with `freq` parameter
+- Slightly improved the performance of `agg`, `nunique`, `describe`, and related methods on 1-column DataFrame and Series objects.
+
+#### Bug Fixes
+
+- Fixed a bug in `DataFrameGroupBy.agg` where func is a list of tuples used to set the names of the output columns.
+- Fixed a bug where converting a modin datetime index with a timezone to a numpy array with `np.asarray` would cause a `TypeError`.
+- Fixed a bug where `Series.isin` with a Series argument matched index labels instead of the row position.
+
+#### Improvements
+
+- Add support for the following in faster pandas:
+  - `groupby.apply`
+  - `groupby.nunique`
+  - `groupby.size`
+  - `concat`
+  - `copy`
+  - `str.isdigit`
+  - `str.islower`
+  - `str.isupper`
+  - `str.istitle`
+  - `str.lower`
+  - `str.upper`
+  - `str.title`
+  - `str.match`
+  - `str.capitalize`
+  - `str.__getitem__`
+  - `str.center`
+  - `str.count`
+  - `str.get`
+  - `str.pad`
+  - `str.len`
+  - `str.ljust`
+  - `str.rjust`  
+  - `str.split`  
+  - `str.replace`  
+  - `str.strip`  
+  - `str.lstrip`  
+  - `str.rstrip`  
+  - `str.translate`  
+  - `dt.tz_localize`
+  - `dt.tz_convert`
+  - `dt.ceil`
+  - `dt.round`
+  - `dt.floor`
+  - `dt.normalize`
+  - `dt.month_name`
+  - `dt.day_name`
+  - `dt.strftime`  
+  - `rolling.min`
+  - `rolling.max`
+  - `rolling.count`
+  - `rolling.sum`
+  - `rolling.mean`
+  - `rolling.std`
+  - `rolling.var`
+  - `rolling.sem`
+  - `rolling.corr`
+  - `expanding.min`
+  - `expanding.max`
+  - `expanding.count`
+  - `expanding.sum`
+  - `expanding.mean`
+  - `expanding.std`
+  - `expanding.var`
+  - `expanding.sem`
+  - `cumsum`
+  - `cummin`
+  - `cummax`
+- Make faster pandas disabled by default (opt-in instead of opt-out).
+- Improve performance of `drop_duplicates` by avoiding joins when `keep!=False` in faster pandas.
+
+## 1.42.0 (2025-10-28)
+
+### Snowpark Python API Updates
+
+#### New Features
+
+- Snowpark python DB-api is now generally available. Access this feature with `DataFrameReader.dbapi()` to read data from a database table or query into a DataFrame using a DBAPI connection.
+
+## 1.41.0 (2025-10-23)
 
 ### Snowpark Python API Updates
 
@@ -49,21 +204,44 @@
     - `st_y`
     - `st_ymax`
     - `st_ymin`
+    - `st_geogfromgeohash`
+    - `st_geogpointfromgeohash`
+    - `st_geographyfromwkb`
+    - `st_geographyfromwkt`
+    - `st_geometryfromwkb`
+    - `st_geometryfromwkt`
+    - `try_to_geography`
+    - `try_to_geometry`
+
+#### Improvements
 
+- Added a parameter to enable and disable automatic column name aliasing for `interval_day_time_from_parts` and `interval_year_month_from_parts` functions.
 
 #### Bug Fixes
 
 - Fixed a bug that `DataFrameReader.xml` fails to parse XML files with undeclared namespaces when `ignoreNamespace` is `True`.
 - Added a fix for floating point precision discrepancies in `interval_day_time_from_parts`.
 - Fixed a bug where writing Snowpark pandas dataframes on the pandas backend with a column multiindex to Snowflake with `to_snowflake` would raise `KeyError`.
 - Fixed a bug that `DataFrameReader.dbapi` (PuPr) is not compatible with oracledb 3.4.0.
+- Fixed a bug where `modin` would unintentionally be imported during session initialization in some scenarios.
+- Fixed a bug where `session.udf|udtf|udaf|sproc.register` failed when an extra session argument was passed. These methods do not expect a session argument; please remove it if provided.
+
+#### Improvements
+
+- The default maximum length for inferred StringType columns during schema inference in `DataFrameReader.dbapi` is now increased from 16MB to 128MB in parquet file based ingestion.
 
 #### Dependency Updates
 
 - Updated dependency of `snowflake-connector-python>=3.17,<5.0.0`.
 
 ### Snowpark pandas API Updates
 
+#### New Features
+
+- Added support for the `dtypes` parameter of `pd.get_dummies`
+- Added support for `nunique` in `df.pivot_table`, `df.agg` and other places where aggregate functions can be used.
+- Added support for `DataFrame.interpolate` and `Series.interpolate` with the "linear", "ffill"/"pad", and "backfill"/bfill" methods. These use the SQL `INTERPOLATE_LINEAR`, `INTERPOLATE_FFILL`, and `INTERPOLATE_BFILL` functions (PuPr).
+
 #### Improvements
 
 - Improved performance of `Series.to_snowflake` and `pd.to_snowflake(series)` for large data by uploading data via a parquet file. You can control the dataset size at which Snowpark pandas switches to parquet with the variable `modin.config.PandasToSnowflakeParquetThresholdBytes`.
@@ -105,7 +283,34 @@
   - `dt.days_in_month`
   - `dt.daysinmonth`
   - `sort_values`
+  - `loc` (setting columns)
   - `to_datetime`
+  - `rename`
+  - `drop`
+  - `invert`
+  - `duplicated`
+  - `iloc`
+  - `head`
+  - `columns` (e.g., df.columns = ["A", "B"])
+  - `agg`
+  - `min`
+  - `max`
+  - `count`
+  - `sum`
+  - `mean`
+  - `median`
+  - `std`
+  - `var`
+  - `groupby.agg`
+  - `groupby.min`
+  - `groupby.max`
+  - `groupby.count`
+  - `groupby.sum`
+  - `groupby.mean`
+  - `groupby.median`
+  - `groupby.std`
+  - `groupby.var`
+  - `drop_duplicates`
 - Reuse row count from the relaxed query compiler in `get_axis_len`.
 
 #### Bug Fixes
 
@@ -1,5 +1,5 @@
 ===========================================
-Hybrid Execution (Public Preview)
+Hybrid Execution
 ===========================================
 
 Snowpark pandas supports workloads on mixed underlying execution engines and will automatically
@@ -37,8 +37,8 @@ read_snowflake, value_counts, tail, var, std, sum, sem, max, min, mean, agg, agg
 Examples
 ========
 
-Enabling Hybrid Execution
-~~~~~~~~~~~~~~~~~~~~~~~~~
+Disabling or Enabling Hybrid Execution
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
@@ -140,4 +140,12 @@ Debugging Hybrid Execution
 
 `pd.explain_switch()` provides information on how execution engine decisions
 are made. This method prints a simplified version of the command unless `simple=False` is
-passed as an argument.
+passed as an argument.
+
+Performance Considerations
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+Hybrid mode will generally perform well with small datasets and traditional notebook
+workloads, but merge-heavy workloads using a star schema can result in moving data too
+often, particularly when tables in the star schema straddle the transfer-cost boundary.
+Since the Snowflake Warehouse is designed for these SQL-like workloads turning off hybrid
+mode may be desirable.