Skip to content

Commit fd2fb44

Browse files
authored
FIX-#7638: Suppress default to pandas warnings on native pandas backend (#7639)
IO and general module functions now all share a code path that checks whether the active backend's query compiler should warn on default to pandas. Methods that default to pandas directly in the base.py frontend layer (rather than at the query compiler level) also now use this code path. Signed-off-by: Jonathan Shi <jonathan.shi@snowflake.com>
1 parent 6504ed3 commit fd2fb44

File tree

14 files changed

+204
-79
lines changed

14 files changed

+204
-79
lines changed

modin/core/storage_formats/base/query_compiler.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ class BaseQueryCompiler(
206206
_shape_hint: Optional[str]
207207
_should_warn_on_default_to_pandas: bool = True
208208

209-
def _maybe_warn_on_default(self, *, message: str = "", reason: str = "") -> None:
209+
@classmethod
210+
def _maybe_warn_on_default(cls, *, message: str = "", reason: str = "") -> None:
210211
"""
211212
If this class is configured to warn on default to pandas, warn.
212213
@@ -217,7 +218,7 @@ def _maybe_warn_on_default(self, *, message: str = "", reason: str = "") -> None
217218
reason : str, default: ""
218219
Reason for default.
219220
"""
220-
if self._should_warn_on_default_to_pandas:
221+
if cls._should_warn_on_default_to_pandas:
221222
ErrorMessage.default_to_pandas(message=message, reason=reason)
222223

223224
@disable_logging

modin/pandas/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -561,8 +561,8 @@ def _default_to_pandas(self, op, *args, reason: str = None, **kwargs):
561561
Result of operation.
562562
"""
563563
empty_self_str = "" if not self.empty else " for empty DataFrame"
564-
ErrorMessage.default_to_pandas(
565-
"`{}.{}`{}".format(
564+
self._query_compiler._maybe_warn_on_default(
565+
message="`{}.{}`{}".format(
566566
type(self).__name__,
567567
op if isinstance(op, str) else op.__name__,
568568
empty_self_str,

modin/pandas/general.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,9 @@
2828
from modin.core.storage_formats.pandas.query_compiler_caster import (
2929
wrap_free_function_in_argument_caster,
3030
)
31-
from modin.error_message import ErrorMessage
3231
from modin.logging import enable_logging
3332
from modin.pandas.io import to_pandas
34-
from modin.utils import _inherit_docstrings
33+
from modin.utils import _inherit_docstrings, _maybe_warn_on_default
3534

3635
from .base import BasePandasDataset
3736
from .dataframe import DataFrame
@@ -193,7 +192,7 @@ def merge_asof(
193192
raise ValueError(
194193
"can not merge DataFrame with instance of type {}".format(type(right))
195194
)
196-
ErrorMessage.default_to_pandas("`merge_asof`")
195+
left._query_compiler._maybe_warn_on_default(message="`merge_asof`")
197196

198197
# As of Pandas 1.2 these should raise an error; before that it did
199198
# something likely random:
@@ -345,7 +344,7 @@ def cut(
345344
if isinstance(x, DataFrame):
346345
raise ValueError("Input array must be 1 dimensional")
347346
if not isinstance(x, Series):
348-
ErrorMessage.default_to_pandas(
347+
_maybe_warn_on_default(
349348
reason=f"pd.cut is not supported on objects of type {type(x)}"
350349
)
351350
import pandas
@@ -656,7 +655,7 @@ def get_dummies(
656655
+ "github.com/modin-project/modin."
657656
)
658657
if not isinstance(data, DataFrame):
659-
ErrorMessage.default_to_pandas("`get_dummies` on non-DataFrame")
658+
_maybe_warn_on_default("`get_dummies` on non-DataFrame")
660659
if isinstance(data, Series):
661660
data = data._to_pandas()
662661
return DataFrame(
@@ -726,7 +725,7 @@ def crosstab(
726725
"""
727726
Compute a simple cross tabulation of two (or more) factors.
728727
"""
729-
ErrorMessage.default_to_pandas("`crosstab`")
728+
_maybe_warn_on_default("`crosstab`")
730729
pandas_crosstab = pandas.crosstab(
731730
index,
732731
columns,
@@ -769,7 +768,7 @@ def lreshape(data: DataFrame, groups, dropna=True) -> DataFrame:
769768
"""
770769
if not isinstance(data, DataFrame):
771770
raise ValueError("can not lreshape with instance of type {}".format(type(data)))
772-
ErrorMessage.default_to_pandas("`lreshape`")
771+
data._query_compiler._maybe_warn_on_default(message="`lreshape`")
773772
return DataFrame(pandas.lreshape(to_pandas(data), groups, dropna=dropna))
774773

775774

modin/pandas/io.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,13 @@
7171
from modin.core.storage_formats.pandas.query_compiler_caster import (
7272
wrap_free_function_in_argument_caster,
7373
)
74-
from modin.error_message import ErrorMessage
7574
from modin.logging import ClassLogger, enable_logging
7675
from modin.utils import (
7776
SupportsPrivateToNumPy,
7877
SupportsPublicToNumPy,
7978
SupportsPublicToPandas,
8079
_inherit_docstrings,
80+
_maybe_warn_on_default,
8181
classproperty,
8282
expanduser_path_arg,
8383
)
@@ -156,7 +156,7 @@ def read_xml(
156156
storage_options: StorageOptions = None,
157157
dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
158158
) -> DataFrame:
159-
ErrorMessage.default_to_pandas("read_xml")
159+
_maybe_warn_on_default("read_xml")
160160
_, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
161161
return ModinObjects.DataFrame(pandas.read_xml(**kwargs))
162162

@@ -658,7 +658,7 @@ def read_sql(
658658
from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
659659

660660
if kwargs.get("chunksize") is not None:
661-
ErrorMessage.default_to_pandas("Parameters provided [chunksize]")
661+
_maybe_warn_on_default("Parameters provided [chunksize]")
662662
df_gen = pandas.read_sql(**kwargs)
663663
return (
664664
ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_pandas(df))
@@ -818,7 +818,7 @@ def json_normalize(
818818
"""
819819
Normalize semi-structured JSON data into a flat table.
820820
"""
821-
ErrorMessage.default_to_pandas("json_normalize")
821+
_maybe_warn_on_default("json_normalize")
822822
return ModinObjects.DataFrame(
823823
pandas.json_normalize(
824824
data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level
@@ -840,7 +840,7 @@ def read_orc(
840840
"""
841841
Load an ORC object from the file path, returning a DataFrame.
842842
"""
843-
ErrorMessage.default_to_pandas("read_orc")
843+
_maybe_warn_on_default("read_orc")
844844
return ModinObjects.DataFrame(
845845
pandas.read_orc(
846846
path,
@@ -886,7 +886,7 @@ def return_handler(*args, **kwargs):
886886
# We don't want to constantly be giving this error message for
887887
# internal methods.
888888
if item[0] != "_":
889-
ErrorMessage.default_to_pandas("`{}`".format(item))
889+
_maybe_warn_on_default("`{}`".format(item))
890890
args = [
891891
(
892892
to_pandas(arg)
@@ -952,7 +952,7 @@ def return_handler(*args, **kwargs):
952952
# We don't want to constantly be giving this error message for
953953
# internal methods.
954954
if item[0] != "_":
955-
ErrorMessage.default_to_pandas("`{}`".format(item))
955+
_maybe_warn_on_default("`{}`".format(item))
956956
args = [
957957
(
958958
to_pandas(arg)

modin/tests/experimental/test_io_exp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
time_parsing_csv_path,
3030
)
3131
from modin.tests.test_utils import (
32-
warns_that_defaulting_to_pandas,
32+
current_execution_is_native,
3333
warns_that_defaulting_to_pandas_if,
3434
)
3535
from modin.utils import try_cast_to_pandas
@@ -129,7 +129,7 @@ def test_read_csv_empty_frame(self):
129129

130130
def test_read_csv_without_glob(self):
131131
with pytest.raises(FileNotFoundError):
132-
with warns_that_defaulting_to_pandas():
132+
with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
133133
pd.read_csv_glob(
134134
"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-",
135135
storage_options={"anon": True},

modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from modin.tests.pandas.utils import df_equals, test_data
2121
from modin.tests.test_utils import (
2222
df_or_series_using_native_execution,
23-
warns_that_defaulting_to_pandas,
2423
warns_that_defaulting_to_pandas_if,
2524
)
2625

@@ -66,7 +65,9 @@ def test_categorical_from_dataframe():
6665

6766
def test_from_dataframe_with_empty_dataframe():
6867
modin_df = pd.DataFrame({"foo_col": pd.Series([], dtype="int64")})
69-
with warns_that_defaulting_to_pandas():
68+
with warns_that_defaulting_to_pandas_if(
69+
not df_or_series_using_native_execution(modin_df)
70+
):
7071
eval_df_protocol(modin_df)
7172

7273

modin/tests/pandas/dataframe/test_iter.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
)
3636
from modin.tests.test_utils import (
3737
current_execution_is_native,
38-
warns_that_defaulting_to_pandas,
3938
warns_that_defaulting_to_pandas_if,
4039
)
4140

@@ -147,7 +146,7 @@ def test_display_options_for___repr__(max_rows_columns, expand_frame_repr, frame
147146
def test___finalize__():
148147
data = test_data_values[0]
149148
# NOTE: __finalize__() defaults to pandas at the API layer.
150-
with warns_that_defaulting_to_pandas():
149+
with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
151150
pd.DataFrame(data).__finalize__(None)
152151

153152

modin/tests/pandas/dataframe/test_udf.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,10 @@
4343
udf_func_keys,
4444
udf_func_values,
4545
)
46-
from modin.tests.test_utils import warns_that_defaulting_to_pandas
46+
from modin.tests.test_utils import (
47+
current_execution_is_native,
48+
warns_that_defaulting_to_pandas_if,
49+
)
4750
from modin.utils import get_current_execution
4851

4952
NPartitions.put(4)
@@ -126,10 +129,10 @@ def test_aggregate_alias():
126129
def test_aggregate_error_checking():
127130
modin_df = pd.DataFrame(test_data["float_nan_data"])
128131

129-
with warns_that_defaulting_to_pandas():
132+
with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
130133
modin_df.aggregate({modin_df.columns[0]: "sum", modin_df.columns[1]: "mean"})
131134

132-
with warns_that_defaulting_to_pandas():
135+
with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
133136
modin_df.aggregate("arcsin")
134137

135138

modin/tests/pandas/extensions/test_groupby_extensions.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@
2424
)
2525
from modin.pandas.groupby import DataFrameGroupBy, SeriesGroupBy
2626
from modin.tests.pandas.utils import default_to_pandas_ignore_string, df_equals
27-
from modin.tests.test_utils import warns_that_defaulting_to_pandas
27+
from modin.tests.test_utils import (
28+
current_execution_is_native,
29+
warns_that_defaulting_to_pandas_if,
30+
)
2831

2932

3033
@pytest.mark.parametrize(
@@ -150,10 +153,7 @@ def ngroups(self):
150153
# Check that the accessor doesn't work on the Python_Test backend.
151154
python_test_df = pandas_df.move_to("Python_Test")
152155
groupby = get_groupby(python_test_df)
153-
# groupby.ngroups defaults to pandas at the API layer,
154-
# where it warns that it's doing so, even for dataframes using the
155-
# Pandas backend.
156-
with warns_that_defaulting_to_pandas():
156+
with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
157157
assert groupby.ngroups == 3
158158

159159
def test_add_ngroups_setter_and_deleter_for_one_backend(
@@ -179,7 +179,7 @@ def _set_ngroups(self, value):
179179

180180
python_test_groupby = get_groupby(python_test_df)
181181

182-
with warns_that_defaulting_to_pandas():
182+
with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
183183
assert python_test_groupby.ngroups == 3
184184

185185
with pytest.raises(AttributeError):
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Licensed to Modin Development Team under one or more contributor license agreements.
2+
# See the NOTICE file distributed with this work for additional information regarding
3+
# copyright ownership. The Modin Development Team licenses this file to you under the
4+
# Apache License, Version 2.0 (the "License"); you may not use this file except in
5+
# compliance with the License. You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software distributed under
10+
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific language
12+
# governing permissions and limitations under the License.
13+
14+
# While other modin backends raise a warning when defaulting to pandas, it does not make sense to
15+
# do so when we're running on the native pandas backend already. These tests ensure such warnings
16+
# are not raised with the pandas backend.
17+
18+
import numpy as np
19+
import pandas
20+
import pytest
21+
22+
import modin.pandas as pd
23+
from modin.config import Backend
24+
from modin.tests.pandas.utils import df_equals
25+
26+
pytestmark = [
27+
pytest.mark.skipif(
28+
Backend.get() != "Pandas",
29+
reason="warnings only suppressed on native pandas backend",
30+
allow_module_level=True,
31+
),
32+
# Error if a default to pandas warning is detected.
33+
pytest.mark.filterwarnings("error:is not supported by NativeOnNative:UserWarning"),
34+
]
35+
36+
37+
def test_crosstab_no_warning():
38+
# Example from pandas docs
39+
# https://pandas.pydata.org/docs/reference/api/pandas.crosstab.html
40+
a = np.array(
41+
["foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar", "foo", "foo", "foo"],
42+
dtype=object,
43+
)
44+
b = np.array(
45+
["one", "one", "one", "two", "one", "one", "one", "two", "two", "two", "one"],
46+
dtype=object,
47+
)
48+
c = np.array(
49+
[
50+
"dull",
51+
"dull",
52+
"shiny",
53+
"dull",
54+
"dull",
55+
"shiny",
56+
"shiny",
57+
"dull",
58+
"shiny",
59+
"shiny",
60+
"shiny",
61+
],
62+
dtype=object,
63+
)
64+
df_equals(
65+
pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"]),
66+
pandas.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"]),
67+
)
68+
69+
70+
def test_json_normalize_no_warning():
71+
# Example from pandas docs
72+
# https://pandas.pydata.org/docs/reference/api/pandas.json_normalize.html
73+
data = [
74+
{"id": 1, "name": {"first": "Coleen", "last": "Volk"}},
75+
{"name": {"given": "Mark", "family": "Regner"}},
76+
{"id": 2, "name": "Faye Raker"},
77+
]
78+
df_equals(pd.json_normalize(data), pandas.json_normalize(data))

0 commit comments

Comments
 (0)