Skip to content

Commit e52c58b

Browse files
SNOW-2464676: Add support for str.isdigit/islower/isupper/istitle/lower/upper/title in faster pandas (#3961)
1 parent 2fa3163 commit e52c58b

File tree

3 files changed

+129
-0
lines changed

3 files changed

+129
-0
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,14 @@
3636
- `groupby.size`
3737
- `concat`
3838
- `copy`
39+
- `str.isdigit`
40+
- `str.islower`
41+
- `str.isupper`
42+
- `str.istitle`
43+
- `str.lower`
44+
- `str.upper`
45+
- `str.title`
46+
3947
- Make faster pandas disabled by default (opt-in instead of opt-out).
4048

4149
## 1.41.0 (2025-10-23)

src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18893,6 +18893,19 @@ def str_isalpha(self) -> None:
1889318893
ErrorMessage.method_not_implemented_error("isalpha", "Series.str")
1889418894

1889518895
def str_isdigit(self) -> "SnowflakeQueryCompiler":
18896+
"""
18897+
Wrapper around _str_isdigit_internal to be supported in faster pandas.
18898+
"""
18899+
relaxed_query_compiler = None
18900+
if self._relaxed_query_compiler is not None:
18901+
relaxed_query_compiler = (
18902+
self._relaxed_query_compiler._str_isdigit_internal()
18903+
)
18904+
18905+
qc = self._str_isdigit_internal()
18906+
return self._maybe_set_relaxed_qc(qc, relaxed_query_compiler)
18907+
18908+
def _str_isdigit_internal(self) -> "SnowflakeQueryCompiler":
1889618909
"""
1889718910
Check whether all characters in each string are digits.
1889818911

@@ -18909,6 +18922,19 @@ def str_isspace(self) -> None:
1890918922
ErrorMessage.method_not_implemented_error("isspace", "Series.str")
1891018923

1891118924
def str_islower(self) -> "SnowflakeQueryCompiler":
18925+
"""
18926+
Wrapper around _str_islower_internal to be supported in faster pandas.
18927+
"""
18928+
relaxed_query_compiler = None
18929+
if self._relaxed_query_compiler is not None:
18930+
relaxed_query_compiler = (
18931+
self._relaxed_query_compiler._str_islower_internal()
18932+
)
18933+
18934+
qc = self._str_islower_internal()
18935+
return self._maybe_set_relaxed_qc(qc, relaxed_query_compiler)
18936+
18937+
def _str_islower_internal(self) -> "SnowflakeQueryCompiler":
1891218938
"""
1891318939
Check whether all characters in each string are lowercase.
1891418940

@@ -18927,6 +18953,19 @@ def str_islower(self) -> "SnowflakeQueryCompiler":
1892718953
return SnowflakeQueryCompiler(new_internal_frame)
1892818954

1892918955
def str_isupper(self) -> "SnowflakeQueryCompiler":
18956+
"""
18957+
Wrapper around _str_isupper_internal to be supported in faster pandas.
18958+
"""
18959+
relaxed_query_compiler = None
18960+
if self._relaxed_query_compiler is not None:
18961+
relaxed_query_compiler = (
18962+
self._relaxed_query_compiler._str_isupper_internal()
18963+
)
18964+
18965+
qc = self._str_isupper_internal()
18966+
return self._maybe_set_relaxed_qc(qc, relaxed_query_compiler)
18967+
18968+
def _str_isupper_internal(self) -> "SnowflakeQueryCompiler":
1893018969
"""
1893118970
Check whether all characters in each string are uppercase.
1893218971

@@ -18945,6 +18984,19 @@ def str_isupper(self) -> "SnowflakeQueryCompiler":
1894518984
return SnowflakeQueryCompiler(new_internal_frame)
1894618985

1894718986
def str_istitle(self) -> "SnowflakeQueryCompiler":
18987+
"""
18988+
Wrapper around _str_istitle_internal to be supported in faster pandas.
18989+
"""
18990+
relaxed_query_compiler = None
18991+
if self._relaxed_query_compiler is not None:
18992+
relaxed_query_compiler = (
18993+
self._relaxed_query_compiler._str_istitle_internal()
18994+
)
18995+
18996+
qc = self._str_istitle_internal()
18997+
return self._maybe_set_relaxed_qc(qc, relaxed_query_compiler)
18998+
18999+
def _str_istitle_internal(self) -> "SnowflakeQueryCompiler":
1894819000
"""
1894919001
Check whether each string is titlecase.
1895019002
We do a regex matching as follows
@@ -18973,6 +19025,17 @@ def str_isdecimal(self) -> None:
1897319025
ErrorMessage.method_not_implemented_error("isdecimal", "Series.str")
1897419026

1897519027
def str_lower(self) -> "SnowflakeQueryCompiler":
19028+
"""
19029+
Wrapper around _str_lower_internal to be supported in faster pandas.
19030+
"""
19031+
relaxed_query_compiler = None
19032+
if self._relaxed_query_compiler is not None:
19033+
relaxed_query_compiler = self._relaxed_query_compiler._str_lower_internal()
19034+
19035+
qc = self._str_lower_internal()
19036+
return self._maybe_set_relaxed_qc(qc, relaxed_query_compiler)
19037+
19038+
def _str_lower_internal(self) -> "SnowflakeQueryCompiler":
1897619039
"""
1897719040
Convert strings to lowercase.
1897819041

@@ -18986,6 +19049,17 @@ def str_lower(self) -> "SnowflakeQueryCompiler":
1898619049
return SnowflakeQueryCompiler(new_internal_frame)
1898719050

1898819051
def str_upper(self) -> "SnowflakeQueryCompiler":
19052+
"""
19053+
Wrapper around _str_upper_internal to be supported in faster pandas.
19054+
"""
19055+
relaxed_query_compiler = None
19056+
if self._relaxed_query_compiler is not None:
19057+
relaxed_query_compiler = self._relaxed_query_compiler._str_upper_internal()
19058+
19059+
qc = self._str_upper_internal()
19060+
return self._maybe_set_relaxed_qc(qc, relaxed_query_compiler)
19061+
19062+
def _str_upper_internal(self) -> "SnowflakeQueryCompiler":
1898919063
"""
1899019064
Convert strings to uppercase.
1899119065

@@ -18999,6 +19073,17 @@ def str_upper(self) -> "SnowflakeQueryCompiler":
1899919073
return SnowflakeQueryCompiler(new_internal_frame)
1900019074

1900119075
def str_title(self) -> "SnowflakeQueryCompiler":
19076+
"""
19077+
Wrapper around _str_title_internal to be supported in faster pandas.
19078+
"""
19079+
relaxed_query_compiler = None
19080+
if self._relaxed_query_compiler is not None:
19081+
relaxed_query_compiler = self._relaxed_query_compiler._str_title_internal()
19082+
19083+
qc = self._str_title_internal()
19084+
return self._maybe_set_relaxed_qc(qc, relaxed_query_compiler)
19085+
19086+
def _str_title_internal(self) -> "SnowflakeQueryCompiler":
1900219087
"""
1900319088
Titlecase the string
1900419089

tests/integ/modin/test_faster_pandas.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,42 @@ def test_rename(session):
721721
assert_frame_equal(snow_result, native_result)
722722

723723

724+
@pytest.mark.parametrize(
725+
"func", ["isdigit", "islower", "istitle", "isupper", "lower", "upper", "title"]
726+
)
727+
@sql_count_checker(query_count=3)
728+
def test_str_no_params(session, func):
729+
with session_parameter_override(
730+
session, "dummy_row_pos_optimization_enabled", True
731+
):
732+
# create tables
733+
table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
734+
session.create_dataframe(
735+
native_pd.DataFrame([["abc"], ["DEF"], ["cba"]], columns=["A"])
736+
).write.save_as_table(table_name, table_type="temp")
737+
738+
# create snow dataframes
739+
df = pd.read_snowflake(table_name)
740+
snow_result = getattr(df["A"].str, func)()
741+
742+
# verify that the input dataframe has a populated relaxed query compiler
743+
assert df._query_compiler._relaxed_query_compiler is not None
744+
assert df._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
745+
# verify that the output dataframe also has a populated relaxed query compiler
746+
assert snow_result._query_compiler._relaxed_query_compiler is not None
747+
assert (
748+
snow_result._query_compiler._relaxed_query_compiler._dummy_row_pos_mode
749+
is True
750+
)
751+
752+
# create pandas dataframes
753+
native_df = df.to_pandas()
754+
native_result = getattr(native_df["A"].str, func)()
755+
756+
# compare results
757+
assert_series_equal(snow_result, native_result)
758+
759+
724760
@sql_count_checker(query_count=3)
725761
def test_str_contains(session):
726762
with session_parameter_override(

0 commit comments

Comments
 (0)