Skip to content

Commit 521e987

Browse files
authored
feat: (Preview) Support diff() for date series (#1423)
* [WIP] Support date series diff. Need to update rewrites * add tests
1 parent 1e5cf02 commit 521e987

File tree

6 files changed

+105
-18
lines changed

6 files changed

+105
-18
lines changed

bigframes/core/compile/aggregate_compiler.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import bigframes_vendored.ibis.expr.types as ibis_types
2727
import pandas as pd
2828

29+
from bigframes.core.compile import constants as compiler_constants
2930
import bigframes.core.compile.ibis_types as compile_ibis_types
3031
import bigframes.core.compile.scalar_op_compiler as scalar_compilers
3132
import bigframes.core.expression as ex
@@ -575,6 +576,30 @@ def _(
575576
return original_column.delta(shifted_column, part="microsecond")
576577

577578

579+
@compile_unary_agg.register
580+
def _(
581+
op: agg_ops.DateSeriesDiffOp,
582+
column: ibis_types.Column,
583+
window=None,
584+
) -> ibis_types.Value:
585+
if not column.type().is_date():
586+
raise TypeError(f"Cannot perform date series diff on type{column.type()}")
587+
588+
original_column = cast(ibis_types.DateColumn, column)
589+
shifted_column = cast(
590+
ibis_types.DateColumn,
591+
compile_unary_agg(agg_ops.ShiftOp(op.periods), column, window),
592+
)
593+
594+
conversion_factor = typing.cast(
595+
ibis_types.IntegerValue, compiler_constants.UNIT_TO_US_CONVERSION_FACTORS["D"]
596+
)
597+
598+
return (
599+
original_column.delta(shifted_column, part="day") * conversion_factor
600+
).floor()
601+
602+
578603
@compile_unary_agg.register
579604
def _(
580605
op: agg_ops.AllOp,

bigframes/core/compile/constants.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
# Datetime constants
17+
UNIT_TO_US_CONVERSION_FACTORS = {
18+
"W": 7 * 24 * 60 * 60 * 1000 * 1000,
19+
"d": 24 * 60 * 60 * 1000 * 1000,
20+
"D": 24 * 60 * 60 * 1000 * 1000,
21+
"h": 60 * 60 * 1000 * 1000,
22+
"m": 60 * 1000 * 1000,
23+
"s": 1000 * 1000,
24+
"ms": 1000,
25+
"us": 1,
26+
"ns": 1e-3,
27+
}

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import numpy as np
2727
import pandas as pd
2828

29+
from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS
2930
import bigframes.core.compile.default_ordering
3031
import bigframes.core.compile.ibis_types
3132
import bigframes.core.expression as ex
@@ -50,19 +51,6 @@
5051
)
5152
_OBJ_REF_IBIS_DTYPE = ibis_dtypes.Struct.from_tuples(_OBJ_REF_STRUCT_SCHEMA) # type: ignore
5253

53-
# Datetime constants
54-
UNIT_TO_US_CONVERSION_FACTORS = {
55-
"W": 7 * 24 * 60 * 60 * 1000 * 1000,
56-
"d": 24 * 60 * 60 * 1000 * 1000,
57-
"D": 24 * 60 * 60 * 1000 * 1000,
58-
"h": 60 * 60 * 1000 * 1000,
59-
"m": 60 * 1000 * 1000,
60-
"s": 1000 * 1000,
61-
"ms": 1000,
62-
"us": 1,
63-
"ns": 1e-3,
64-
}
65-
6654

6755
class ScalarOpCompiler:
6856
# Mapping of operation name to implemenations

bigframes/core/rewrite/timedeltas.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,15 @@ def _rewrite_aggregation(
215215
else:
216216
input_type = aggregation.arg.dtype
217217

218-
if isinstance(aggregation.op, aggs.DiffOp) and dtypes.is_datetime_like(input_type):
219-
return ex.UnaryAggregation(
220-
aggs.TimeSeriesDiffOp(aggregation.op.periods), aggregation.arg
221-
)
218+
if isinstance(aggregation.op, aggs.DiffOp):
219+
if dtypes.is_datetime_like(input_type):
220+
return ex.UnaryAggregation(
221+
aggs.TimeSeriesDiffOp(aggregation.op.periods), aggregation.arg
222+
)
223+
elif input_type == dtypes.DATE_DTYPE:
224+
return ex.UnaryAggregation(
225+
aggs.DateSeriesDiffOp(aggregation.op.periods), aggregation.arg
226+
)
222227

223228
if isinstance(aggregation.op, aggs.StdOp) and input_type is dtypes.TIMEDELTA_DTYPE:
224229
return ex.UnaryAggregation(

bigframes/operations/aggregations.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ def skips_nulls(self):
500500
return False
501501

502502
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
503-
if dtypes.is_datetime_like(input_types[0]):
503+
if dtypes.is_date_like(input_types[0]):
504504
return dtypes.TIMEDELTA_DTYPE
505505
return super().output_type(*input_types)
506506

@@ -519,6 +519,20 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
519519
raise TypeError(f"expect datetime-like types, but got {input_types[0]}")
520520

521521

522+
@dataclasses.dataclass(frozen=True)
523+
class DateSeriesDiffOp(UnaryWindowOp):
524+
periods: int
525+
526+
@property
527+
def skips_nulls(self):
528+
return False
529+
530+
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
531+
if input_types[0] == dtypes.DATE_DTYPE:
532+
return dtypes.TIMEDELTA_DTYPE
533+
raise TypeError(f"expect date type, but got {input_types[0]}")
534+
535+
522536
@dataclasses.dataclass(frozen=True)
523537
class AllOp(UnaryAggregateOp):
524538
name: ClassVar[str] = "all"
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pandas.testing
16+
17+
from bigframes import dtypes
18+
19+
20+
def test_date_series_diff_agg(scalars_dfs):
21+
bf_df, pd_df = scalars_dfs
22+
23+
actual_result = bf_df["date_col"].diff().to_pandas()
24+
25+
expected_result = pd_df["date_col"].diff().astype(dtypes.TIMEDELTA_DTYPE)
26+
pandas.testing.assert_series_equal(
27+
actual_result, expected_result, check_index_type=False
28+
)

0 commit comments

Comments
 (0)