Make DataFrame.any_rowwise top-level, rename to _horizontal (#324)

MarcoGorelli · web-flow · commit 27d5fc43cb87 · 2023-11-22T20:10:46.000Z
diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
@@ -2,7 +2,7 @@
 """Function stubs and API documentation for the DataFrame API standard."""
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Literal
 
 from .column_object import Column
 from .dataframe_object import DataFrame
@@ -300,3 +300,119 @@ def date(year: int, month: int, day: int) -> Scalar:
     ... )
     >>> df.filter(mask)
     """
+
+
+def any_horizontal(*columns: Column, skip_nulls: bool = True) -> Column:
+    """Reduction returns a Column.
+
+    Differs from :meth:`DataFrame.any` in that the reduction happens
+    for each row, rather than for each column.
+
+    All the `columns` must have the same parent DataFrame.
+    The return value has the same parent DataFrame as the input columns.
+
+    Raises
+    ------
+    ValueError
+        If any of the columns is not boolean.
+
+    Examples
+    --------
+    >>> df: DataFrame
+    >>> ns = df.__dataframe_namespace__()
+    >>> mask = ns.any_horizontal(
+    ...     *[df.col(col_name) > 0 for col_name in df.column_names()]
+    ... )
+    >>> df = df.filter(mask)
+    """
+    ...
+
+
+def all_horizontal(*columns: Column, skip_nulls: bool = True) -> Column:
+    """Reduction returns a Column.
+
+    Differs from :meth:`DataFrame.all` in that the reduction happens
+    for each row, rather than for each column.
+
+    All the `columns` must have the same parent DataFrame.
+    The return value has the same parent DataFrame as the input columns.
+
+    Raises
+    ------
+    ValueError
+        If any of the columns is not boolean.
+
+    Examples
+    --------
+    >>> df: DataFrame
+    >>> ns = df.__dataframe_namespace__()
+    >>> mask = ns.all_horizontal(
+    ...     *[df.col(col_name) > 0 for col_name in df.column_names()]
+    ... )
+    >>> df = df.filter(mask)
+    """
+    ...
+
+
+def sorted_indices(
+    *columns: Column,
+    ascending: Sequence[bool] | bool = True,
+    nulls_position: Literal["first", "last"] = "last",
+) -> Column:
+    """Return row numbers which would sort according to given columns.
+
+    If you need to sort the DataFrame, use :meth:`sort`.
+
+    Parameters
+    ----------
+    *columns : Column
+        Columns to sort by.
+    ascending : Sequence[bool] or bool
+        If `True`, sort by all keys in ascending order.
+        If `False`, sort by all keys in descending order.
+        If a sequence, it must be the same length as `keys`,
+        and determines the direction with which to use each
+        key to sort by.
+    nulls_position : ``{'first', 'last'}``
+        Whether null values should be placed at the beginning
+        or at the end of the result.
+        Note that the position of NaNs is unspecified and may
+        vary based on the implementation.
+
+    Returns
+    -------
+    Column
+        The return value has the same parent DataFrame as the input columns.
+
+    Raises
+    ------
+    ValueError
+        If `keys` and `ascending` are sequences of different lengths.
+    """
+    ...
+
+
+def unique_indices(*columns: Column, skip_nulls: bool = True) -> Column:
+    """Return indices corresponding to unique values across selected columns.
+
+    Parameters
+    ----------
+    *columns : Column
+        Column names to consider when finding unique values.
+
+    Returns
+    -------
+    Column
+        Indices corresponding to unique values.
+
+    Notes
+    -----
+    There are no ordering guarantees. In particular, if there are multiple
+    indices corresponding to the same unique value(s), there is no guarantee
+    about which one will appear in the result.
+    If the original column(s) contain multiple `'NaN'` values, then
+    only a single index corresponding to those values will be returned.
+    Likewise for null values (if ``skip_nulls=False``).
+    To get the unique values, you can do ``df.get_rows(df.unique_indices(keys))``.
+    """
+    ...
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
@@ -183,7 +183,7 @@ def sort(
         """Sort column.
 
         If you need the indices which would sort the column,
-        use :meth:`sorted_indices`.
+        use `sorted_indices`.
 
         Parameters
         ----------
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -284,7 +284,7 @@ def sort(
         """Sort dataframe according to given columns.
 
         If you only need the indices which would sort the dataframe, use
-        :meth:`sorted_indices`.
+        `sorted_indices`.
 
         Parameters
         ----------
@@ -314,44 +314,6 @@ def sort(
         """
         ...
 
-    def sorted_indices(
-        self,
-        *keys: str,
-        ascending: Sequence[bool] | bool = True,
-        nulls_position: Literal["first", "last"] = "last",
-    ) -> Column:
-        """Return row numbers which would sort according to given columns.
-
-        If you need to sort the DataFrame, use :meth:`sort`.
-
-        Parameters
-        ----------
-        *keys : str
-            Names of columns to sort by.
-            If not specified, sort by all columns.
-        ascending : Sequence[bool] or bool
-            If `True`, sort by all keys in ascending order.
-            If `False`, sort by all keys in descending order.
-            If a sequence, it must be the same length as `keys`,
-            and determines the direction with which to use each
-            key to sort by.
-        nulls_position : ``{'first', 'last'}``
-            Whether null values should be placed at the beginning
-            or at the end of the result.
-            Note that the position of NaNs is unspecified and may
-            vary based on the implementation.
-
-        Returns
-        -------
-        Column
-
-        Raises
-        ------
-        ValueError
-            If `keys` and `ascending` are sequences of different lengths.
-        """
-        ...
-
     def __eq__(self, other: AnyScalar) -> Self:  # type: ignore[override]
         """Compare for equality.
 
@@ -678,32 +640,6 @@ def all(self, *, skip_nulls: bool | Scalar = True) -> Self:
         """
         ...
 
-    def any_rowwise(self, *, skip_nulls: bool | Scalar = True) -> Column:
-        """Reduction returns a Column.
-
-        Differs from ``DataFrame.any`` and that the reduction happens
-        for each row, rather than for each column.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-
-    def all_rowwise(self, *, skip_nulls: bool | Scalar = True) -> Column:
-        """Reduction returns a Column.
-
-        Differs from ``DataFrame.all`` and that the reduction happens
-        for each row, rather than for each column.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-
     def min(self, *, skip_nulls: bool | Scalar = True) -> Self:
         """Reduction returns a 1-row DataFrame."""
         ...
@@ -804,32 +740,6 @@ def is_nan(self) -> Self:
         """
         ...
 
-    def unique_indices(self, *keys: str, skip_nulls: bool | Scalar = True) -> Column:
-        """Return indices corresponding to unique values across selected columns.
-
-        Parameters
-        ----------
-        *keys : str
-            Column names to consider when finding unique values.
-            If not specified, all columns are considered.
-
-        Returns
-        -------
-        Column
-            Indices corresponding to unique values.
-
-        Notes
-        -----
-        There are no ordering guarantees. In particular, if there are multiple
-        indices corresponding to the same unique value(s), there is no guarantee
-        about which one will appear in the result.
-        If the original column(s) contain multiple `'NaN'` values, then
-        only a single index corresponding to those values will be returned.
-        Likewise for null values (if ``skip_nulls=False``).
-        To get the unique values, you can do ``df.get_rows(df.unique_indices(keys))``.
-        """
-        ...
-
     def fill_nan(self, value: float | NullType | Scalar, /) -> Self:
         """Fill ``nan`` values with the given fill value.
 
diff --git a/spec/API_specification/dataframe_api/typing.py b/spec/API_specification/dataframe_api/typing.py
@@ -134,6 +134,35 @@ def is_dtype(self, dtype: DType, kind: str | tuple[str, ...]) -> bool:
     def date(self, year: int, month: int, day: int) -> Scalar:
         ...
 
+    def any_horizontal(
+        self,
+        *columns: Column,
+        skip_nulls: bool = True,
+    ) -> Column:
+        ...
+
+    def all_horizontal(
+        self,
+        *columns: Column,
+        skip_nulls: bool = True,
+    ) -> Column:
+        ...
+
+    def sorted_indices(
+        self,
+        *columns: Column,
+        ascending: Sequence[bool] | bool = True,
+        nulls_position: Literal["first", "last"] = "last",
+    ) -> Column:
+        ...
+
+    def unique_indices(
+        self,
+        *columns: Column,
+        skip_nulls: bool = True,
+    ) -> Column:
+        ...
+
 
 DType = Union[
     Namespace.Bool,
diff --git a/spec/API_specification/examples/06_horizontal_functions.py b/spec/API_specification/examples/06_horizontal_functions.py
@@ -0,0 +1,28 @@
+"""Example of how to use a horizontal function.
+
+Horizontal functions are functions that take multiple columns as input and return a
+single column as output.
+
+Examples include:
+- `any_horizontal`
+- `all_horizontal`
+
+These can be accessed by first using ``__dataframe_namespace__`` to get the
+namespace object, and then calling the function on the namespace object and passing
+an iterable of ``Column``s as input.
+"""
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from dataframe_api.typing import SupportsDataFrameAPI
+
+
+def main(df_raw: SupportsDataFrameAPI) -> SupportsDataFrameAPI:
+    df = df_raw.__dataframe_consortium_standard__(api_version="2023-11.beta")
+    ns = df.__dataframe_namespace__()
+    df = df.filter(
+        ns.any_horizontal(*[df.col(col_name) > 0 for col_name in df.column_names]),
+    )
+    return df.dataframe