Skip to content

Commit 502bcdc

Browse files
committed
Merge remote-tracking branch 'camriddell/main' into simp-pandas-group-by
2 parents 532d6b3 + 6bf1731 commit 502bcdc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+971
-105
lines changed

docs/api-reference/expr_dt.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
- minute
1414
- month
1515
- nanosecond
16+
- offset_by
1617
- ordinal_day
1718
- replace_time_zone
1819
- second

docs/api-reference/narwhals.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Here are the top-level functions available in Narwhals.
99
- all
1010
- all_horizontal
1111
- any_horizontal
12+
- coalesce
1213
- col
1314
- concat
1415
- concat_str

docs/api-reference/series_dt.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
- minute
1414
- month
1515
- nanosecond
16+
- offset_by
1617
- ordinal_day
1718
- replace_time_zone
1819
- second

narwhals/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
all_ as all,
5050
all_horizontal,
5151
any_horizontal,
52+
coalesce,
5253
col,
5354
concat,
5455
concat_str,
@@ -127,6 +128,7 @@
127128
"all",
128129
"all_horizontal",
129130
"any_horizontal",
131+
"coalesce",
130132
"col",
131133
"concat",
132134
"concat_str",

narwhals/_arrow/namespace.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,27 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
262262
context=self,
263263
)
264264

265+
def coalesce(self, *exprs: ArrowExpr) -> ArrowExpr:
266+
def func(df: ArrowDataFrame) -> list[ArrowSeries]:
267+
align = self._series._align_full_broadcast
268+
init_series, *series = align(*chain.from_iterable(expr(df) for expr in exprs))
269+
return [
270+
ArrowSeries(
271+
pc.coalesce(init_series.native, *(s.native for s in series)),
272+
name=init_series.name,
273+
version=self._version,
274+
)
275+
]
276+
277+
return self._expr._from_callable(
278+
func=func,
279+
depth=max(x._depth for x in exprs) + 1,
280+
function_name="coalesce",
281+
evaluate_output_names=combine_evaluate_output_names(*exprs),
282+
alias_output_names=combine_alias_output_names(*exprs),
283+
context=self,
284+
)
285+
265286

266287
class ArrowWhen(EagerWhen[ArrowDataFrame, ArrowSeries, ArrowExpr, "ChunkedArrayAny"]):
267288
@property

narwhals/_arrow/series_dt.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
US_PER_MINUTE,
1919
US_PER_SECOND,
2020
)
21-
from narwhals._duration import parse_interval_string
21+
from narwhals._duration import Interval
2222

2323
if TYPE_CHECKING:
2424
from collections.abc import Mapping
@@ -202,7 +202,25 @@ def total_nanoseconds(self) -> ArrowSeries:
202202
return self.with_native(pc.multiply(self.native, factor).cast(pa.int64()))
203203

204204
def truncate(self, every: str) -> ArrowSeries:
205-
multiple, unit = parse_interval_string(every)
205+
interval = Interval.parse(every)
206206
return self.with_native(
207-
pc.floor_temporal(self.native, multiple=multiple, unit=UNITS_DICT[unit])
207+
pc.floor_temporal(self.native, interval.multiple, UNITS_DICT[interval.unit])
208208
)
209+
210+
def offset_by(self, by: str) -> ArrowSeries:
211+
interval = Interval.parse_no_constraints(by)
212+
native = self.native
213+
if interval.unit in {"y", "q", "mo"}:
214+
msg = f"Offsetting by {interval.unit} is not yet supported for pyarrow."
215+
raise NotImplementedError(msg)
216+
if interval.unit == "d":
217+
offset: pa.DurationScalar[Any] = lit(interval.to_timedelta())
218+
if time_zone := native.type.tz:
219+
native_naive = pc.local_timestamp(native)
220+
result = pc.assume_timezone(pc.add(native_naive, offset), time_zone)
221+
return self.with_native(result)
222+
elif interval.unit == "ns": # pragma: no cover
223+
offset = lit(interval.multiple, pa.duration("ns")) # type: ignore[assignment]
224+
else:
225+
offset = lit(interval.to_timedelta())
226+
return self.with_native(pc.add(native, offset))

narwhals/_compliant/any_namespace.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def total_milliseconds(self) -> CompliantT_co: ...
4848
def total_microseconds(self) -> CompliantT_co: ...
4949
def total_nanoseconds(self) -> CompliantT_co: ...
5050
def truncate(self, every: str) -> CompliantT_co: ...
51+
def offset_by(self, by: str) -> CompliantT_co: ...
5152

5253

5354
class ListNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]):

narwhals/_compliant/expr.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,9 @@ def total_nanoseconds(self) -> EagerExprT:
11121112
def truncate(self, every: str) -> EagerExprT:
11131113
return self.compliant._reuse_series_namespace("dt", "truncate", every=every)
11141114

1115+
def offset_by(self, by: str) -> EagerExprT:
1116+
return self.compliant._reuse_series_namespace("dt", "offset_by", by=by)
1117+
11151118

11161119
class EagerExprListNamespace(
11171120
EagerExprNamespace[EagerExprT], ListNamespace[EagerExprT], Generic[EagerExprT]

narwhals/_compliant/namespace.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def concat_str(
9696
def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ...
9797
@property
9898
def _expr(self) -> type[CompliantExprT]: ...
99+
def coalesce(self, *exprs: CompliantExprT) -> CompliantExprT: ...
99100

100101

101102
class DepthTrackingNamespace(

narwhals/_dask/expr_dt.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
from narwhals._compliant import LazyExprNamespace
66
from narwhals._compliant.any_namespace import DateTimeNamespace
77
from narwhals._constants import MS_PER_SECOND, NS_PER_SECOND, US_PER_SECOND
8-
from narwhals._duration import parse_interval_string
8+
from narwhals._duration import Interval
99
from narwhals._pandas_like.utils import (
10-
UNIT_DICT,
10+
ALIAS_DICT,
1111
calculate_timestamp_date,
1212
calculate_timestamp_datetime,
1313
native_to_narwhals_dtype,
@@ -154,9 +154,22 @@ def total_nanoseconds(self) -> DaskExpr:
154154
)
155155

156156
def truncate(self, every: str) -> DaskExpr:
157-
multiple, unit = parse_interval_string(every)
157+
interval = Interval.parse(every)
158+
unit = interval.unit
158159
if unit in {"mo", "q", "y"}:
159-
msg = f"Truncating to {unit} is not supported yet for dask."
160+
msg = f"Truncating to {unit} is not yet supported for dask."
160161
raise NotImplementedError(msg)
161-
freq = f"{multiple}{UNIT_DICT.get(unit, unit)}"
162+
freq = f"{interval.multiple}{ALIAS_DICT.get(unit, unit)}"
162163
return self.compliant._with_callable(lambda expr: expr.dt.floor(freq), "truncate")
164+
165+
def offset_by(self, by: str) -> DaskExpr:
166+
def func(s: dx.Series, by: str) -> dx.Series:
167+
interval = Interval.parse_no_constraints(by)
168+
unit = interval.unit
169+
if unit in {"y", "q", "mo", "d", "ns"}:
170+
msg = f"Offsetting by {unit} is not yet supported for dask."
171+
raise NotImplementedError(msg)
172+
offset = interval.to_timedelta()
173+
return s.add(offset)
174+
175+
return self.compliant._with_callable(func, "offset_by", by=by)

0 commit comments

Comments
 (0)