Skip to content

Commit 27d5fc4

Browse files
authored
Make DataFrame.any_rowwise top-level, rename to _horizontal (#324)
1 parent 2623018 commit 27d5fc4

File tree

5 files changed

+176
-93
lines changed

5 files changed

+176
-93
lines changed

spec/API_specification/dataframe_api/__init__.py

Lines changed: 117 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"""Function stubs and API documentation for the DataFrame API standard."""
33
from __future__ import annotations
44

5-
from typing import TYPE_CHECKING, Any
5+
from typing import TYPE_CHECKING, Any, Literal
66

77
from .column_object import Column
88
from .dataframe_object import DataFrame
@@ -300,3 +300,119 @@ def date(year: int, month: int, day: int) -> Scalar:
300300
... )
301301
>>> df.filter(mask)
302302
"""
303+
304+
305+
def any_horizontal(*columns: Column, skip_nulls: bool = True) -> Column:
306+
"""Reduction returns a Column.
307+
308+
Differs from :meth:`DataFrame.any` in that the reduction happens
309+
for each row, rather than for each column.
310+
311+
All the `columns` must have the same parent DataFrame.
312+
The return value has the same parent DataFrame as the input columns.
313+
314+
Raises
315+
------
316+
ValueError
317+
If any of the columns is not boolean.
318+
319+
Examples
320+
--------
321+
>>> df: DataFrame
322+
>>> ns = df.__dataframe_namespace__()
323+
>>> mask = ns.any_horizontal(
324+
... *[df.col(col_name) > 0 for col_name in df.column_names()]
325+
... )
326+
>>> df = df.filter(mask)
327+
"""
328+
...
329+
330+
331+
def all_horizontal(*columns: Column, skip_nulls: bool = True) -> Column:
332+
"""Reduction returns a Column.
333+
334+
Differs from :meth:`DataFrame.all` in that the reduction happens
335+
for each row, rather than for each column.
336+
337+
All the `columns` must have the same parent DataFrame.
338+
The return value has the same parent DataFrame as the input columns.
339+
340+
Raises
341+
------
342+
ValueError
343+
If any of the columns is not boolean.
344+
345+
Examples
346+
--------
347+
>>> df: DataFrame
348+
>>> ns = df.__dataframe_namespace__()
349+
>>> mask = ns.all_horizontal(
350+
... *[df.col(col_name) > 0 for col_name in df.column_names()]
351+
... )
352+
>>> df = df.filter(mask)
353+
"""
354+
...
355+
356+
357+
def sorted_indices(
358+
*columns: Column,
359+
ascending: Sequence[bool] | bool = True,
360+
nulls_position: Literal["first", "last"] = "last",
361+
) -> Column:
362+
"""Return row numbers which would sort according to given columns.
363+
364+
If you need to sort the DataFrame, use :meth:`sort`.
365+
366+
Parameters
367+
----------
368+
*columns : Column
369+
Columns to sort by.
370+
ascending : Sequence[bool] or bool
371+
If `True`, sort by all keys in ascending order.
372+
If `False`, sort by all keys in descending order.
373+
If a sequence, it must be the same length as `keys`,
374+
and determines the direction with which to use each
375+
key to sort by.
376+
nulls_position : ``{'first', 'last'}``
377+
Whether null values should be placed at the beginning
378+
or at the end of the result.
379+
Note that the position of NaNs is unspecified and may
380+
vary based on the implementation.
381+
382+
Returns
383+
-------
384+
Column
385+
The return value has the same parent DataFrame as the input columns.
386+
387+
Raises
388+
------
389+
ValueError
390+
If `keys` and `ascending` are sequences of different lengths.
391+
"""
392+
...
393+
394+
395+
def unique_indices(*columns: Column, skip_nulls: bool = True) -> Column:
396+
"""Return indices corresponding to unique values across selected columns.
397+
398+
Parameters
399+
----------
400+
*columns : Column
401+
Column names to consider when finding unique values.
402+
403+
Returns
404+
-------
405+
Column
406+
Indices corresponding to unique values.
407+
408+
Notes
409+
-----
410+
There are no ordering guarantees. In particular, if there are multiple
411+
indices corresponding to the same unique value(s), there is no guarantee
412+
about which one will appear in the result.
413+
If the original column(s) contain multiple `'NaN'` values, then
414+
only a single index corresponding to those values will be returned.
415+
Likewise for null values (if ``skip_nulls=False``).
416+
To get the unique values, you can do ``df.get_rows(df.unique_indices(keys))``.
417+
"""
418+
...

spec/API_specification/dataframe_api/column_object.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def sort(
183183
"""Sort column.
184184
185185
If you need the indices which would sort the column,
186-
use :meth:`sorted_indices`.
186+
use `sorted_indices`.
187187
188188
Parameters
189189
----------

spec/API_specification/dataframe_api/dataframe_object.py

Lines changed: 1 addition & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ def sort(
284284
"""Sort dataframe according to given columns.
285285
286286
If you only need the indices which would sort the dataframe, use
287-
:meth:`sorted_indices`.
287+
`sorted_indices`.
288288
289289
Parameters
290290
----------
@@ -314,44 +314,6 @@ def sort(
314314
"""
315315
...
316316

317-
def sorted_indices(
318-
self,
319-
*keys: str,
320-
ascending: Sequence[bool] | bool = True,
321-
nulls_position: Literal["first", "last"] = "last",
322-
) -> Column:
323-
"""Return row numbers which would sort according to given columns.
324-
325-
If you need to sort the DataFrame, use :meth:`sort`.
326-
327-
Parameters
328-
----------
329-
*keys : str
330-
Names of columns to sort by.
331-
If not specified, sort by all columns.
332-
ascending : Sequence[bool] or bool
333-
If `True`, sort by all keys in ascending order.
334-
If `False`, sort by all keys in descending order.
335-
If a sequence, it must be the same length as `keys`,
336-
and determines the direction with which to use each
337-
key to sort by.
338-
nulls_position : ``{'first', 'last'}``
339-
Whether null values should be placed at the beginning
340-
or at the end of the result.
341-
Note that the position of NaNs is unspecified and may
342-
vary based on the implementation.
343-
344-
Returns
345-
-------
346-
Column
347-
348-
Raises
349-
------
350-
ValueError
351-
If `keys` and `ascending` are sequences of different lengths.
352-
"""
353-
...
354-
355317
def __eq__(self, other: AnyScalar) -> Self: # type: ignore[override]
356318
"""Compare for equality.
357319
@@ -678,32 +640,6 @@ def all(self, *, skip_nulls: bool | Scalar = True) -> Self:
678640
"""
679641
...
680642

681-
def any_rowwise(self, *, skip_nulls: bool | Scalar = True) -> Column:
682-
"""Reduction returns a Column.
683-
684-
Differs from ``DataFrame.any`` and that the reduction happens
685-
for each row, rather than for each column.
686-
687-
Raises
688-
------
689-
ValueError
690-
If any of the DataFrame's columns is not boolean.
691-
"""
692-
...
693-
694-
def all_rowwise(self, *, skip_nulls: bool | Scalar = True) -> Column:
695-
"""Reduction returns a Column.
696-
697-
Differs from ``DataFrame.all`` and that the reduction happens
698-
for each row, rather than for each column.
699-
700-
Raises
701-
------
702-
ValueError
703-
If any of the DataFrame's columns is not boolean.
704-
"""
705-
...
706-
707643
def min(self, *, skip_nulls: bool | Scalar = True) -> Self:
708644
"""Reduction returns a 1-row DataFrame."""
709645
...
@@ -804,32 +740,6 @@ def is_nan(self) -> Self:
804740
"""
805741
...
806742

807-
def unique_indices(self, *keys: str, skip_nulls: bool | Scalar = True) -> Column:
808-
"""Return indices corresponding to unique values across selected columns.
809-
810-
Parameters
811-
----------
812-
*keys : str
813-
Column names to consider when finding unique values.
814-
If not specified, all columns are considered.
815-
816-
Returns
817-
-------
818-
Column
819-
Indices corresponding to unique values.
820-
821-
Notes
822-
-----
823-
There are no ordering guarantees. In particular, if there are multiple
824-
indices corresponding to the same unique value(s), there is no guarantee
825-
about which one will appear in the result.
826-
If the original column(s) contain multiple `'NaN'` values, then
827-
only a single index corresponding to those values will be returned.
828-
Likewise for null values (if ``skip_nulls=False``).
829-
To get the unique values, you can do ``df.get_rows(df.unique_indices(keys))``.
830-
"""
831-
...
832-
833743
def fill_nan(self, value: float | NullType | Scalar, /) -> Self:
834744
"""Fill ``nan`` values with the given fill value.
835745

spec/API_specification/dataframe_api/typing.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,35 @@ def is_dtype(self, dtype: DType, kind: str | tuple[str, ...]) -> bool:
134134
def date(self, year: int, month: int, day: int) -> Scalar:
135135
...
136136

137+
def any_horizontal(
138+
self,
139+
*columns: Column,
140+
skip_nulls: bool = True,
141+
) -> Column:
142+
...
143+
144+
def all_horizontal(
145+
self,
146+
*columns: Column,
147+
skip_nulls: bool = True,
148+
) -> Column:
149+
...
150+
151+
def sorted_indices(
152+
self,
153+
*columns: Column,
154+
ascending: Sequence[bool] | bool = True,
155+
nulls_position: Literal["first", "last"] = "last",
156+
) -> Column:
157+
...
158+
159+
def unique_indices(
160+
self,
161+
*columns: Column,
162+
skip_nulls: bool = True,
163+
) -> Column:
164+
...
165+
137166

138167
DType = Union[
139168
Namespace.Bool,
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""Example of how to use a horizontal function.
2+
3+
Horizontal functions are functions that take multiple columns as input and return a
4+
single column as output.
5+
6+
Examples include:
7+
- `any_horizontal`
8+
- `all_horizontal`
9+
10+
These can be accessed by first using ``__dataframe_namespace__`` to get the
11+
namespace object, and then calling the function on the namespace object and passing
12+
an iterable of ``Column``s as input.
13+
"""
14+
from __future__ import annotations
15+
16+
from typing import TYPE_CHECKING
17+
18+
if TYPE_CHECKING:
19+
from dataframe_api.typing import SupportsDataFrameAPI
20+
21+
22+
def main(df_raw: SupportsDataFrameAPI) -> SupportsDataFrameAPI:
23+
df = df_raw.__dataframe_consortium_standard__(api_version="2023-11.beta")
24+
ns = df.__dataframe_namespace__()
25+
df = df.filter(
26+
ns.any_horizontal(*[df.col(col_name) > 0 for col_name in df.column_names]),
27+
)
28+
return df.dataframe

0 commit comments

Comments
 (0)