Skip to content

Commit 7ac6fe1

Browse files
feat: Local date accessor execution support (#2034)
1 parent fbb2094 commit 7ac6fe1

File tree

7 files changed

+150
-4
lines changed

7 files changed

+150
-4
lines changed

bigframes/core/compile/polars/compiler.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@
3434
import bigframes.operations.array_ops as arr_ops
3535
import bigframes.operations.bool_ops as bool_ops
3636
import bigframes.operations.comparison_ops as comp_ops
37+
import bigframes.operations.date_ops as date_ops
3738
import bigframes.operations.datetime_ops as dt_ops
39+
import bigframes.operations.frequency_ops as freq_ops
3840
import bigframes.operations.generic_ops as gen_ops
3941
import bigframes.operations.json_ops as json_ops
4042
import bigframes.operations.numeric_ops as num_ops
@@ -75,6 +77,20 @@ def decorator(func):
7577

7678

7779
if polars_installed:
80+
_FREQ_MAPPING = {
81+
"Y": "1y",
82+
"Q": "1q",
83+
"M": "1mo",
84+
"W": "1w",
85+
"D": "1d",
86+
"h": "1h",
87+
"min": "1m",
88+
"s": "1s",
89+
"ms": "1ms",
90+
"us": "1us",
91+
"ns": "1ns",
92+
}
93+
7894
_DTYPE_MAPPING = {
7995
# Direct mappings
8096
bigframes.dtypes.INT_DTYPE: pl.Int64(),
@@ -330,11 +346,48 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
330346
else:
331347
return pl.any_horizontal(*(input.str.ends_with(pat) for pat in op.pat))
332348

349+
@compile_op.register(freq_ops.FloorDtOp)
350+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
351+
assert isinstance(op, freq_ops.FloorDtOp)
352+
return input.dt.truncate(every=_FREQ_MAPPING[op.freq])
353+
333354
@compile_op.register(dt_ops.StrftimeOp)
334355
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
335356
assert isinstance(op, dt_ops.StrftimeOp)
336357
return input.dt.strftime(op.date_format)
337358

359+
@compile_op.register(date_ops.YearOp)
360+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
361+
return input.dt.year()
362+
363+
@compile_op.register(date_ops.QuarterOp)
364+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
365+
return input.dt.quarter()
366+
367+
@compile_op.register(date_ops.MonthOp)
368+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
369+
return input.dt.month()
370+
371+
@compile_op.register(date_ops.DayOfWeekOp)
372+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
373+
return input.dt.weekday() - 1
374+
375+
@compile_op.register(date_ops.DayOp)
376+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
377+
return input.dt.day()
378+
379+
@compile_op.register(date_ops.IsoYearOp)
380+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
381+
return input.dt.iso_year()
382+
383+
@compile_op.register(date_ops.IsoWeekOp)
384+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
385+
return input.dt.week()
386+
387+
@compile_op.register(date_ops.IsoDayOp)
388+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
389+
return input.dt.weekday()
390+
338391
@compile_op.register(dt_ops.ParseDatetimeOp)
339392
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
340393
assert isinstance(op, dt_ops.ParseDatetimeOp)

bigframes/operations/datetimes.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
_ONE_DAY = pandas.Timedelta("1d")
3131
_ONE_SECOND = pandas.Timedelta("1s")
3232
_ONE_MICRO = pandas.Timedelta("1us")
33+
_SUPPORTED_FREQS = ("Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us")
3334

3435

3536
@log_adapter.class_logger
@@ -155,4 +156,6 @@ def normalize(self) -> series.Series:
155156
return self._apply_unary_op(ops.normalize_op)
156157

157158
def floor(self, freq: str) -> series.Series:
158-
return self._apply_unary_op(ops.FloorDtOp(freq=freq))
159+
if freq not in _SUPPORTED_FREQS:
160+
raise ValueError(f"freq must be one of {_SUPPORTED_FREQS}")
161+
return self._apply_unary_op(ops.FloorDtOp(freq=freq)) # type: ignore

bigframes/operations/frequency_ops.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,22 @@
2727
@dataclasses.dataclass(frozen=True)
2828
class FloorDtOp(base_ops.UnaryOp):
2929
name: typing.ClassVar[str] = "floor_dt"
30-
freq: str
30+
freq: typing.Literal[
31+
"Y",
32+
"Q",
33+
"M",
34+
"W",
35+
"D",
36+
"h",
37+
"min",
38+
"s",
39+
"ms",
40+
"us",
41+
]
3142

3243
def output_type(self, *input_types):
44+
if not dtypes.is_datetime_like(input_types[0]):
45+
raise TypeError("dt floor requires datetime-like arguments")
3346
return input_types[0]
3447

3548

bigframes/session/polars_executor.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
from bigframes.operations import (
2525
bool_ops,
2626
comparison_ops,
27+
date_ops,
28+
frequency_ops,
2729
generic_ops,
2830
numeric_ops,
2931
string_ops,
@@ -60,6 +62,15 @@
6062
comparison_ops.GtOp,
6163
comparison_ops.LeOp,
6264
comparison_ops.GeOp,
65+
date_ops.YearOp,
66+
date_ops.QuarterOp,
67+
date_ops.MonthOp,
68+
date_ops.DayOfWeekOp,
69+
date_ops.DayOp,
70+
date_ops.IsoYearOp,
71+
date_ops.IsoWeekOp,
72+
date_ops.IsoDayOp,
73+
frequency_ops.FloorDtOp,
6374
numeric_ops.AddOp,
6475
numeric_ops.SubOp,
6576
numeric_ops.MulOp,
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pytest
16+
17+
from bigframes.core import array_value
18+
import bigframes.operations as ops
19+
from bigframes.session import polars_executor
20+
from bigframes.testing.engine_utils import assert_equivalence_execution
21+
22+
pytest.importorskip("polars")
23+
24+
# Polars used as reference as its fast and local. Generally though, prefer gbq engine where they disagree.
25+
REFERENCE_ENGINE = polars_executor.PolarsExecutor()
26+
27+
28+
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
29+
def test_engines_dt_floor(scalars_array_value: array_value.ArrayValue, engine):
30+
arr, _ = scalars_array_value.compute_values(
31+
[
32+
ops.FloorDtOp("us").as_expr("timestamp_col"),
33+
ops.FloorDtOp("ms").as_expr("timestamp_col"),
34+
ops.FloorDtOp("s").as_expr("timestamp_col"),
35+
ops.FloorDtOp("min").as_expr("timestamp_col"),
36+
ops.FloorDtOp("h").as_expr("timestamp_col"),
37+
ops.FloorDtOp("D").as_expr("timestamp_col"),
38+
ops.FloorDtOp("W").as_expr("timestamp_col"),
39+
ops.FloorDtOp("M").as_expr("timestamp_col"),
40+
ops.FloorDtOp("Q").as_expr("timestamp_col"),
41+
ops.FloorDtOp("Y").as_expr("timestamp_col"),
42+
ops.FloorDtOp("Q").as_expr("datetime_col"),
43+
ops.FloorDtOp("us").as_expr("datetime_col"),
44+
]
45+
)
46+
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
47+
48+
49+
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
50+
def test_engines_date_accessors(scalars_array_value: array_value.ArrayValue, engine):
51+
datelike_cols = ["datetime_col", "timestamp_col", "date_col"]
52+
accessors = [
53+
ops.day_op,
54+
ops.dayofweek_op,
55+
ops.month_op,
56+
ops.quarter_op,
57+
ops.year_op,
58+
ops.iso_day_op,
59+
ops.iso_week_op,
60+
ops.iso_year_op,
61+
]
62+
63+
exprs = [acc.as_expr(col) for acc in accessors for col in datelike_cols]
64+
65+
arr, _ = scalars_array_value.compute_values(exprs)
66+
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)

tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_floor_dt/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
55
), `bfcte_1` AS (
66
SELECT
77
*,
8-
TIMESTAMP_TRUNC(`bfcol_0`, DAY) AS `bfcol_1`
8+
TIMESTAMP_TRUNC(`bfcol_0`, D) AS `bfcol_1`
99
FROM `bfcte_0`
1010
)
1111
SELECT

tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def test_expm1(scalar_types_df: bpd.DataFrame, snapshot):
153153

154154
def test_floor_dt(scalar_types_df: bpd.DataFrame, snapshot):
155155
bf_df = scalar_types_df[["timestamp_col"]]
156-
sql = _apply_unary_op(bf_df, ops.FloorDtOp("DAY"), "timestamp_col")
156+
sql = _apply_unary_op(bf_df, ops.FloorDtOp("D"), "timestamp_col")
157157

158158
snapshot.assert_match(sql, "out.sql")
159159

0 commit comments

Comments
 (0)