Skip to content
Merged
14 changes: 13 additions & 1 deletion modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3079,9 +3079,21 @@ def sort_rows_by_column_values(
BaseQueryCompiler
New QueryCompiler that contains result of the sort.
"""
return DataFrameDefault.register(pandas.DataFrame.sort_values)(
# Avoid index/column name collisions by renaming and restoring after sorting
index_renaming = None
if is_scalar(columns):
columns = [columns]
if any(name in columns for name in self.index.names):
index_renaming = self.index.names
self.index = self.index.set_names([None] * len(self.index.names))
new_query_compiler = DataFrameDefault.register(pandas.DataFrame.sort_values)(
self, by=columns, axis=0, ascending=ascending, **kwargs
)
if index_renaming is not None:
new_query_compiler.index = new_query_compiler.index.set_names(
index_renaming
)
return new_query_compiler

# END Abstract map across rows/columns

Expand Down
11 changes: 10 additions & 1 deletion modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3833,7 +3833,16 @@ def value_counts(
by=subset, dropna=dropna, observed=True, sort=False
).size()
if sort:
counted_values.sort_values(ascending=ascending, inplace=True)
if counted_values.name is None:
counted_values.name = 0
by = counted_values.name
result = counted_values._query_compiler.sort_rows_by_column_values(
columns=by,
ascending=ascending,
)
counted_values = self._create_or_update_from_compiler(result)
if isinstance(counted_values, pd.DataFrame):
counted_values = counted_values.squeeze(axis=1)
if normalize:
counted_values = counted_values / counted_values.sum()
# TODO: uncomment when strict compability mode will be implemented:
Expand Down
42 changes: 42 additions & 0 deletions modin/tests/pandas/dataframe/test_join_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,3 +1012,45 @@ def test_compare(align_axis, keep_shape, keep_equal):
modin_result = modin_series2.compare(modin_series1, **kwargs)
pandas_result = pandas_series2.compare(pandas_series1, **kwargs)
assert to_pandas(modin_result).equals(pandas_result)


@pytest.mark.parametrize(
"params",
[
{"ascending": True},
{"normalize": True},
pytest.param(
{"sort": False},
marks=pytest.mark.xfail(
reason="Known issue with sort=False in `groupby()` "
+ "(https://github.com/modin-project/modin/issues/3571)",
strict=Engine.get() in ("Ray", "Dask", "Unidist"),
),
),
],
)
def test_value_counts(params):
data = [[4, 1, 3, 2], [2, 5, 6, 5], [4, 3, 3, 5]]
columns = ["col1", "col2", "col3", "col4"]

eval_general(
*create_test_dfs(data, columns=columns),
lambda df: df["col1"].value_counts(**params),
)


def test_value_counts_with_nulls():
data = [[5, 6, None, 7, 7], [None, None, 5, 8]]
eval_general(*create_test_dfs(data), lambda df: df[0].value_counts(dropna=False))


def test_value_counts_with_multiindex():
data = [[1, 2, 2, 4]]
index = pd.MultiIndex.from_arrays(
arrays=[["a", "a", "b", "b"], [1, 2, 1, 2]], names=("l1", "l2")
)

eval_general(
*create_test_dfs(data, index=index),
lambda df: df[0].value_counts(),
)
Loading