Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ class BaseQueryCompiler(
_shape_hint: Optional[str]
_should_warn_on_default_to_pandas: bool = True

def _maybe_warn_on_default(self, *, message: str = "", reason: str = "") -> None:
@classmethod
def _maybe_warn_on_default(cls, *, message: str = "", reason: str = "") -> None:
"""
If this class is configured to warn on default to pandas, warn.

Expand All @@ -217,7 +218,7 @@ def _maybe_warn_on_default(self, *, message: str = "", reason: str = "") -> None
reason : str, default: ""
Reason for default.
"""
if self._should_warn_on_default_to_pandas:
if cls._should_warn_on_default_to_pandas:
ErrorMessage.default_to_pandas(message=message, reason=reason)

@disable_logging
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,8 +561,8 @@ def _default_to_pandas(self, op, *args, reason: str = None, **kwargs):
Result of operation.
"""
empty_self_str = "" if not self.empty else " for empty DataFrame"
ErrorMessage.default_to_pandas(
"`{}.{}`{}".format(
self._query_compiler._maybe_warn_on_default(
message="`{}.{}`{}".format(
type(self).__name__,
op if isinstance(op, str) else op.__name__,
empty_self_str,
Expand Down
13 changes: 6 additions & 7 deletions modin/pandas/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,9 @@
from modin.core.storage_formats.pandas.query_compiler_caster import (
wrap_free_function_in_argument_caster,
)
from modin.error_message import ErrorMessage
from modin.logging import enable_logging
from modin.pandas.io import to_pandas
from modin.utils import _inherit_docstrings
from modin.utils import _inherit_docstrings, _maybe_warn_on_default

from .base import BasePandasDataset
from .dataframe import DataFrame
Expand Down Expand Up @@ -193,7 +192,7 @@ def merge_asof(
raise ValueError(
"can not merge DataFrame with instance of type {}".format(type(right))
)
ErrorMessage.default_to_pandas("`merge_asof`")
left._query_compiler._maybe_warn_on_default(message="`merge_asof`")

# As of Pandas 1.2 these should raise an error; before that it did
# something likely random:
Expand Down Expand Up @@ -345,7 +344,7 @@ def cut(
if isinstance(x, DataFrame):
raise ValueError("Input array must be 1 dimensional")
if not isinstance(x, Series):
ErrorMessage.default_to_pandas(
_maybe_warn_on_default(
reason=f"pd.cut is not supported on objects of type {type(x)}"
)
import pandas
Expand Down Expand Up @@ -656,7 +655,7 @@ def get_dummies(
+ "github.com/modin-project/modin."
)
if not isinstance(data, DataFrame):
ErrorMessage.default_to_pandas("`get_dummies` on non-DataFrame")
_maybe_warn_on_default("`get_dummies` on non-DataFrame")
if isinstance(data, Series):
data = data._to_pandas()
return DataFrame(
Expand Down Expand Up @@ -726,7 +725,7 @@ def crosstab(
"""
Compute a simple cross tabulation of two (or more) factors.
"""
ErrorMessage.default_to_pandas("`crosstab`")
_maybe_warn_on_default("`crosstab`")
pandas_crosstab = pandas.crosstab(
index,
columns,
Expand Down Expand Up @@ -769,7 +768,7 @@ def lreshape(data: DataFrame, groups, dropna=True) -> DataFrame:
"""
if not isinstance(data, DataFrame):
raise ValueError("can not lreshape with instance of type {}".format(type(data)))
ErrorMessage.default_to_pandas("`lreshape`")
data._query_compiler._maybe_warn_on_default(message="`lreshape`")
return DataFrame(pandas.lreshape(to_pandas(data), groups, dropna=dropna))


Expand Down
14 changes: 7 additions & 7 deletions modin/pandas/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,13 @@
from modin.core.storage_formats.pandas.query_compiler_caster import (
wrap_free_function_in_argument_caster,
)
from modin.error_message import ErrorMessage
from modin.logging import ClassLogger, enable_logging
from modin.utils import (
SupportsPrivateToNumPy,
SupportsPublicToNumPy,
SupportsPublicToPandas,
_inherit_docstrings,
_maybe_warn_on_default,
classproperty,
expanduser_path_arg,
)
Expand Down Expand Up @@ -156,7 +156,7 @@ def read_xml(
storage_options: StorageOptions = None,
dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
) -> DataFrame:
ErrorMessage.default_to_pandas("read_xml")
_maybe_warn_on_default("read_xml")
_, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
return ModinObjects.DataFrame(pandas.read_xml(**kwargs))

Expand Down Expand Up @@ -658,7 +658,7 @@ def read_sql(
from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

if kwargs.get("chunksize") is not None:
ErrorMessage.default_to_pandas("Parameters provided [chunksize]")
_maybe_warn_on_default("Parameters provided [chunksize]")
df_gen = pandas.read_sql(**kwargs)
return (
ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_pandas(df))
Expand Down Expand Up @@ -818,7 +818,7 @@ def json_normalize(
"""
Normalize semi-structured JSON data into a flat table.
"""
ErrorMessage.default_to_pandas("json_normalize")
_maybe_warn_on_default("json_normalize")
return ModinObjects.DataFrame(
pandas.json_normalize(
data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level
Expand All @@ -840,7 +840,7 @@ def read_orc(
"""
Load an ORC object from the file path, returning a DataFrame.
"""
ErrorMessage.default_to_pandas("read_orc")
_maybe_warn_on_default("read_orc")
return ModinObjects.DataFrame(
pandas.read_orc(
path,
Expand Down Expand Up @@ -886,7 +886,7 @@ def return_handler(*args, **kwargs):
# We don't want to constantly be giving this error message for
# internal methods.
if item[0] != "_":
ErrorMessage.default_to_pandas("`{}`".format(item))
_maybe_warn_on_default("`{}`".format(item))
args = [
(
to_pandas(arg)
Expand Down Expand Up @@ -952,7 +952,7 @@ def return_handler(*args, **kwargs):
# We don't want to constantly be giving this error message for
# internal methods.
if item[0] != "_":
ErrorMessage.default_to_pandas("`{}`".format(item))
_maybe_warn_on_default("`{}`".format(item))
args = [
(
to_pandas(arg)
Expand Down
4 changes: 2 additions & 2 deletions modin/tests/experimental/test_io_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
time_parsing_csv_path,
)
from modin.tests.test_utils import (
warns_that_defaulting_to_pandas,
current_execution_is_native,
warns_that_defaulting_to_pandas_if,
)
from modin.utils import try_cast_to_pandas
Expand Down Expand Up @@ -129,7 +129,7 @@ def test_read_csv_empty_frame(self):

def test_read_csv_without_glob(self):
with pytest.raises(FileNotFoundError):
with warns_that_defaulting_to_pandas():
with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
pd.read_csv_glob(
"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-",
storage_options={"anon": True},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from modin.tests.pandas.utils import df_equals, test_data
from modin.tests.test_utils import (
df_or_series_using_native_execution,
warns_that_defaulting_to_pandas,
warns_that_defaulting_to_pandas_if,
)

Expand Down Expand Up @@ -66,7 +65,9 @@ def test_categorical_from_dataframe():

def test_from_dataframe_with_empty_dataframe():
modin_df = pd.DataFrame({"foo_col": pd.Series([], dtype="int64")})
with warns_that_defaulting_to_pandas():
with warns_that_defaulting_to_pandas_if(
not df_or_series_using_native_execution(modin_df)
):
eval_df_protocol(modin_df)


Expand Down
3 changes: 1 addition & 2 deletions modin/tests/pandas/dataframe/test_iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
)
from modin.tests.test_utils import (
current_execution_is_native,
warns_that_defaulting_to_pandas,
warns_that_defaulting_to_pandas_if,
)

Expand Down Expand Up @@ -147,7 +146,7 @@ def test_display_options_for___repr__(max_rows_columns, expand_frame_repr, frame
def test___finalize__():
data = test_data_values[0]
# NOTE: __finalize__() defaults to pandas at the API layer.
with warns_that_defaulting_to_pandas():
with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
pd.DataFrame(data).__finalize__(None)


Expand Down
9 changes: 6 additions & 3 deletions modin/tests/pandas/dataframe/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@
udf_func_keys,
udf_func_values,
)
from modin.tests.test_utils import warns_that_defaulting_to_pandas
from modin.tests.test_utils import (
current_execution_is_native,
warns_that_defaulting_to_pandas_if,
)
from modin.utils import get_current_execution

NPartitions.put(4)
Expand Down Expand Up @@ -126,10 +129,10 @@ def test_aggregate_alias():
def test_aggregate_error_checking():
modin_df = pd.DataFrame(test_data["float_nan_data"])

with warns_that_defaulting_to_pandas():
with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
modin_df.aggregate({modin_df.columns[0]: "sum", modin_df.columns[1]: "mean"})

with warns_that_defaulting_to_pandas():
with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
modin_df.aggregate("arcsin")


Expand Down
12 changes: 6 additions & 6 deletions modin/tests/pandas/extensions/test_groupby_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@
)
from modin.pandas.groupby import DataFrameGroupBy, SeriesGroupBy
from modin.tests.pandas.utils import default_to_pandas_ignore_string, df_equals
from modin.tests.test_utils import warns_that_defaulting_to_pandas
from modin.tests.test_utils import (
current_execution_is_native,
warns_that_defaulting_to_pandas_if,
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -150,10 +153,7 @@ def ngroups(self):
# Check that the accessor doesn't work on the Python_Test backend.
python_test_df = pandas_df.move_to("Python_Test")
groupby = get_groupby(python_test_df)
# groupby.ngroups defaults to pandas at the API layer,
# where it warns that it's doing so, even for dataframes using the
# Pandas backend.
with warns_that_defaulting_to_pandas():
with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
assert groupby.ngroups == 3

def test_add_ngroups_setter_and_deleter_for_one_backend(
Expand All @@ -179,7 +179,7 @@ def _set_ngroups(self, value):

python_test_groupby = get_groupby(python_test_df)

with warns_that_defaulting_to_pandas():
with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
assert python_test_groupby.ngroups == 3

with pytest.raises(AttributeError):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

# While other modin backends raise a warning when defaulting to pandas, it does not make sense to
# do so when we're running on the native pandas backend already. These tests ensure such warnings
# are not raised with the pandas backend.

import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import Backend
from modin.tests.pandas.utils import df_equals

pytestmark = [
pytest.mark.skipif(
Backend.get() != "Pandas",
reason="warnings only suppressed on native pandas backend",
allow_module_level=True,
),
# Error if a default to pandas warning is detected.
pytest.mark.filterwarnings("error:is not supported by NativeOnNative:UserWarning"),
]


def test_crosstab_no_warning():
# Example from pandas docs
# https://pandas.pydata.org/docs/reference/api/pandas.crosstab.html
a = np.array(
["foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar", "foo", "foo", "foo"],
dtype=object,
)
b = np.array(
["one", "one", "one", "two", "one", "one", "one", "two", "two", "two", "one"],
dtype=object,
)
c = np.array(
[
"dull",
"dull",
"shiny",
"dull",
"dull",
"shiny",
"shiny",
"dull",
"shiny",
"shiny",
"shiny",
],
dtype=object,
)
df_equals(
pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"]),
pandas.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"]),
)


def test_json_normalize_no_warning():
# Example from pandas docs
# https://pandas.pydata.org/docs/reference/api/pandas.json_normalize.html
data = [
{"id": 1, "name": {"first": "Coleen", "last": "Volk"}},
{"name": {"given": "Mark", "family": "Regner"}},
{"id": 2, "name": "Faye Raker"},
]
df_equals(pd.json_normalize(data), pandas.json_normalize(data))
Loading
Loading