Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
b4e8885
added more decorators
sfc-gh-jenzhang Oct 16, 2025
3f953fe
sort_values axis
sfc-gh-jenzhang Oct 16, 2025
e762110
added tests
sfc-gh-jenzhang Oct 16, 2025
5e454b0
Merge branch 'main' of https://github.com/snowflakedb/snowpark-python…
sfc-gh-jenzhang Oct 16, 2025
d2e69e8
Merge branch 'main' of https://github.com/snowflakedb/snowpark-python…
sfc-gh-jenzhang Oct 16, 2025
4f09559
added more tests
sfc-gh-jenzhang Oct 16, 2025
0f84e94
made api cls name a list
sfc-gh-jenzhang Oct 16, 2025
c82f927
added more tests
sfc-gh-jenzhang Oct 16, 2025
57adc40
fixed error messgae
sfc-gh-jenzhang Oct 16, 2025
9ab0886
removed apply series
sfc-gh-jenzhang Oct 16, 2025
c67a75a
fixed test file
sfc-gh-jenzhang Oct 16, 2025
f2b35f4
Merge branch 'main' into not-implemented-args
sfc-gh-jenzhang Oct 16, 2025
ffa678a
modified changelog
sfc-gh-jenzhang Oct 16, 2025
e7fe898
Merge branch 'main' of https://github.com/snowflakedb/snowpark-python…
sfc-gh-jenzhang Oct 16, 2025
50da76c
Merge branch 'not-implemented-args' of https://github.com/snowflakedb…
sfc-gh-jenzhang Oct 16, 2025
b6cb173
revert a change
sfc-gh-jenzhang Oct 16, 2025
a93a7d6
Update src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_c…
sfc-gh-jenzhang Oct 17, 2025
cbee6a9
changed index to columns
sfc-gh-jenzhang Oct 17, 2025
124379c
fixed comments
sfc-gh-jenzhang Oct 17, 2025
fddef76
removed redundant code
sfc-gh-jenzhang Oct 17, 2025
830bcd2
Update src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_c…
sfc-gh-jenzhang Oct 17, 2025
c7d483d
Update snowflake_query_compiler.py
sfc-gh-jenzhang Oct 20, 2025
0bd5609
added comment
sfc-gh-jenzhang Oct 20, 2025
7c92e5a
Merge branch 'not-implemented-args' of https://github.com/snowflakedb…
sfc-gh-jenzhang Oct 20, 2025
aae870a
Merge branch 'main' into not-implemented-args
sfc-gh-jenzhang Oct 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,16 @@
- `skew()` with `axis=1` or `numeric_only=False` parameters
- `round()` with `decimals` parameter as a Series
- `corr()` with `method!=pearson` parameter
- `shift()` with `suffix` or non-integer `periods` parameters
- `sort_index()` with `axis=1` or `key` parameters
- `sort_values()` with `axis=1`
- `melt()` with `col_level` parameter
- `apply()` with `result_type` parameter for DataFrame
- `pivot_table()` with `sort=True`, non-string `index` list, non-string `columns` list, non-string `values` list, or `aggfunc` dict with non-string values
- `fillna()` with `downcast` parameter or using `limit` together with `value`
- `dropna()` with `axis=1`


- Set `cte_optimization_enabled` to True for all Snowpark pandas sessions.
- Add support for the following in faster pandas:
- `isin`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ def get_unsupported_args_reason(


def register_query_compiler_method_not_implemented(
api_cls_name: Optional[str],
api_cls_names: Union[list[Optional[str]], Optional[str]],
method_name: str,
unsupported_args: Optional["UnsupportedArgsRule"] = None,
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
Expand All @@ -644,22 +644,30 @@ def register_query_compiler_method_not_implemented(
without meaningful benefit.

Args:
api_cls_name: Frontend class name (e.g., "BasePandasDataset", "Series", "DataFrame", "None").
api_cls_names: Frontend class names (e.g. "BasePandasDataset", "Series", "DataFrame", or None). It can be a list if multiple api_clas_names are needed.
method_name: Method name to register.
unsupported_args: UnsupportedArgsRule for args-based auto-switching.
If None, method is treated as completely unimplemented.
"""
reg_key = MethodKey(api_cls_name, method_name)

# register the method in the hybrid switch for unsupported args
if unsupported_args is None:
HYBRID_SWITCH_FOR_UNIMPLEMENTED_METHODS.add(reg_key)
else:
HYBRID_SWITCH_FOR_UNSUPPORTED_ARGS[reg_key] = unsupported_args
if isinstance(api_cls_names, str) or api_cls_names is None:
api_cls_names = [api_cls_names]
assert (
api_cls_names
), "api_cls_names must be a string (e.g., 'DataFrame', 'Series') or a list of strings (e.g., ['DataFrame', 'Series']) or None for top-level functions"

register_function_for_pre_op_switch(
class_name=api_cls_name, backend="Snowflake", method=method_name
)
for api_cls_name in api_cls_names:
reg_key = MethodKey(api_cls_name, method_name)

# register the method in the hybrid switch for unsupported args
if unsupported_args is None:
HYBRID_SWITCH_FOR_UNIMPLEMENTED_METHODS.add(reg_key)
else:
HYBRID_SWITCH_FOR_UNSUPPORTED_ARGS[reg_key] = unsupported_args

register_function_for_pre_op_switch(
class_name=api_cls_name, backend="Snowflake", method=method_name
)

def decorator(query_compiler_method: Callable[..., Any]) -> Callable[..., Any]:
@functools.wraps(query_compiler_method)
Expand Down Expand Up @@ -2591,6 +2599,22 @@ def _shift_index(self, periods: int, freq: Any) -> "SnowflakeQueryCompiler": #
# TODO: SNOW-1023324, implement shifting index only.
ErrorMessage.not_implemented("shifting index values not yet supported.")

@register_query_compiler_method_not_implemented(
"BasePandasDataset",
"shift",
UnsupportedArgsRule(
unsupported_conditions=[
(
lambda args: args.get("suffix") is not None,
"the 'suffix' parameter is not yet supported",
),
(
lambda args: not isinstance(args.get("periods"), int),
"only int 'periods' is currently supported",
),
]
),
)
def shift(
self,
periods: Union[int, Sequence[int]] = 1,
Expand Down Expand Up @@ -4178,6 +4202,19 @@ def first_last_valid_index(
)
return None

@register_query_compiler_method_not_implemented(
"BasePandasDataset",
"sort_index",
UnsupportedArgsRule(
unsupported_conditions=[
("axis", 1),
(
lambda args: args.get("key") is not None,
"the 'key' parameter is not yet supported",
),
]
),
)
def sort_index(
self,
*,
Expand Down Expand Up @@ -4240,7 +4277,7 @@ def sort_index(
1.0 c
dtype: object
"""
if axis in (1, "index"):
if axis == 1:
Comment on lines -4257 to +4294
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why remove index?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is redundant because the frontend layer already parses "index" to 1.

ErrorMessage.not_implemented(
"sort_index is not supported yet on axis=1 in Snowpark pandas."
)
Expand All @@ -4264,8 +4301,17 @@ def sort_index(
include_indexer=include_indexer,
)

@register_query_compiler_method_not_implemented(
"BasePandasDataset",
"sort_values",
UnsupportedArgsRule(
unsupported_conditions=[
("axis", 1),
]
),
)
def sort_columns_by_row_values(
self, rows: IndexLabel, ascending: bool = True, **kwargs: Any
self, rows: IndexLabel, ascending: bool = True, axis: int = 1, **kwargs: Any
) -> None:
"""
Reorder the columns based on the lexicographic order of the given rows.
Expand Down Expand Up @@ -8658,6 +8704,18 @@ def cummax(
).frame
)

@register_query_compiler_method_not_implemented(
None,
"melt",
UnsupportedArgsRule(
unsupported_conditions=[
(
lambda args: args.get("col_level") is not None,
"col_level argument is not yet supported",
),
]
),
)
def melt(
self,
id_vars: list[str],
Expand Down Expand Up @@ -9705,6 +9763,18 @@ def align(

return left_qc, right_qc

@register_query_compiler_method_not_implemented(
"DataFrame",
"apply",
UnsupportedArgsRule(
unsupported_conditions=[
(
lambda args: args.get("result_type") is not None,
"the 'result_type' parameter is not yet supported",
),
]
),
)
def apply(
self,
func: Union[AggFuncType, UserDefinedFunction],
Expand Down Expand Up @@ -10377,6 +10447,63 @@ def pivot(
sort=True,
)

@register_query_compiler_method_not_implemented(
None,
"pivot_table",
UnsupportedArgsRule(
unsupported_conditions=[
("sort", False),
(
lambda args: (
args.get("index") is not None
and (
not isinstance(args.get("index"), str)
and not all([isinstance(v, str) for v in args.get("index")])
and None not in args.get("index")
)
),
Comment on lines +10545 to +10552
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The condition logic has a potential issue when handling non-string, non-iterable values for index. If index is a scalar value like an integer, the expression all([isinstance(v, str) for v in args.get("index")]) will attempt to iterate over a non-iterable object, causing a TypeError.

Consider restructuring this condition to first check if the value is iterable before attempting to iterate over it, or use a try/except block to handle this case. A safer approach might be:

lambda args: (
    args.get("index") is not None
    and not isinstance(args.get("index"), str)
    and (
        not hasattr(args.get("index"), "__iter__") 
        or not all(isinstance(v, str) for v in args.get("index"))
    )
    and None not in (args.get("index") if hasattr(args.get("index"), "__iter__") else [args.get("index")])
)

The same issue appears in the similar conditions for columns and values parameters.

Suggested change
lambda args: (
args.get("index") is not None
and (
not isinstance(args.get("index"), str)
and not all([isinstance(v, str) for v in args.get("index")])
and None not in args.get("index")
)
),
lambda args: (
args.get("index") is not None
and not isinstance(args.get("index"), str)
and (
not hasattr(args.get("index"), "__iter__")
or not all(isinstance(v, str) for v in args.get("index"))
)
and None not in (
args.get("index") if hasattr(args.get("index"), "__iter__") else [args.get("index")]
)
),

Spotted by Graphite Agent

Fix in Graphite


Is this helpful? React 👍 or 👎 to let us know.

"non-string of list of string index is not yet supported for pivot_table",
),
(
lambda args: (
args.get("columns") is not None
and (
not isinstance(args.get("columns"), str)
and not all(
[isinstance(v, str) for v in args.get("columns")]
)
and None not in args.get("columns")
)
),
"non-string of list of string columns is not yet supported for pivot_table",
),
(
lambda args: (
args.get("values") is not None
and (
not isinstance(args.get("values"), str)
and not all(
[isinstance(v, str) for v in args.get("values")]
)
and None not in args.get("values")
)
),
"non-string of list of string values is not yet supported for pivot_table",
),
(
lambda args: (
isinstance(args.get("aggfunc"), dict)
and any(
not isinstance(af, str)
for af in args.get("aggfunc").values()
)
and args.get("index") is None
),
"dictionary aggfunc with non-string aggregation functions is not yet supported for pivot_table with margins or when index is None",
),
]
),
)
def pivot_table(
self,
index: Any,
Expand Down Expand Up @@ -10452,7 +10579,7 @@ def pivot_table(
index = [index]

# TODO: SNOW-857485 Support for non-str and list of non-str for index/columns/values
if index and (
if index is not None and (
not isinstance(index, str)
and not all([isinstance(v, str) for v in index])
and None not in index
Expand All @@ -10461,7 +10588,7 @@ def pivot_table(
f"Not implemented non-string of list of string {index}."
)

if values and (
if values is not None and (
not isinstance(values, str)
and not all([isinstance(v, str) for v in values])
and None not in values
Expand All @@ -10470,7 +10597,7 @@ def pivot_table(
f"Not implemented non-string of list of string {values}."
)

if columns and (
if columns is not None and (
not isinstance(columns, str)
and not all([isinstance(v, str) for v in columns])
and None not in columns
Expand Down Expand Up @@ -12541,6 +12668,23 @@ def _make_fill_expression_for_column_wise_fillna(
*columns_to_include,
)

@register_query_compiler_method_not_implemented(
["DataFrame", "Series"],
"fillna",
UnsupportedArgsRule(
unsupported_conditions=[
(
lambda kwargs: kwargs.get("value") is not None
and kwargs.get("limit") is not None,
"the 'limit' parameter with 'value' parameter is not yet supported",
),
(
lambda kwargs: kwargs.get("downcast") is not None,
"the 'downcast' parameter is not yet supported",
),
]
),
)
def fillna(
self,
value: Optional[Union[Hashable, Mapping, "pd.DataFrame", "pd.Series"]] = None,
Expand Down Expand Up @@ -12786,6 +12930,15 @@ def fillna_expr(snowflake_quoted_id: str) -> SnowparkColumn:
).frame
)

@register_query_compiler_method_not_implemented(
"DataFrame",
"dropna",
UnsupportedArgsRule(
unsupported_conditions=[
("axis", 1),
]
),
)
def dropna(
self,
axis: int,
Expand Down Expand Up @@ -21239,7 +21392,7 @@ def _stack_helper(
return qc

@register_query_compiler_method_not_implemented(
api_cls_name="DataFrame",
api_cls_names=["DataFrame"],
method_name="corr",
unsupported_args=UnsupportedArgsRule(
unsupported_conditions=[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1623,6 +1623,7 @@ def sort_values(
na_position=na_position,
ignore_index=ignore_index,
key=key,
axis=axis,
)
return self._create_or_update_from_compiler(result, inplace)

Expand Down
Loading
Loading