Skip to content

Commit 24050cb

Browse files
authored
feat: support date data type for to_datetime() (#1902)
* feat: support date data type for to_datetime() * add test preconditions * fix test * fix lint * fix tests * fix lint * fix test * update docstring * update docstring
1 parent 07222bf commit 24050cb

File tree

4 files changed

+42
-8
lines changed

4 files changed

+42
-8
lines changed

bigframes/core/tools/datetimes.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414

1515
from collections.abc import Mapping
16-
from datetime import datetime
16+
from datetime import date, datetime
1717
from typing import Optional, Union
1818

1919
import bigframes_vendored.constants as constants
@@ -28,7 +28,7 @@
2828

2929
def to_datetime(
3030
arg: Union[
31-
Union[int, float, str, datetime],
31+
Union[int, float, str, datetime, date],
3232
vendored_pandas_datetimes.local_iterables,
3333
bigframes.series.Series,
3434
bigframes.dataframe.DataFrame,
@@ -38,7 +38,7 @@ def to_datetime(
3838
format: Optional[str] = None,
3939
unit: Optional[str] = None,
4040
) -> Union[pd.Timestamp, datetime, bigframes.series.Series]:
41-
if isinstance(arg, (int, float, str, datetime)):
41+
if isinstance(arg, (int, float, str, datetime, date)):
4242
return pd.to_datetime(
4343
arg,
4444
utc=utc,
@@ -62,7 +62,11 @@ def to_datetime(
6262
f"Unit parameter is not supported for non-numerical input types. {constants.FEEDBACK_LINK}"
6363
)
6464

65-
if arg.dtype in (bigframes.dtypes.TIMESTAMP_DTYPE, bigframes.dtypes.DATETIME_DTYPE):
65+
if arg.dtype in (
66+
bigframes.dtypes.TIMESTAMP_DTYPE,
67+
bigframes.dtypes.DATETIME_DTYPE,
68+
bigframes.dtypes.DATE_DTYPE,
69+
):
6670
to_type = (
6771
bigframes.dtypes.TIMESTAMP_DTYPE if utc else bigframes.dtypes.DATETIME_DTYPE
6872
)

bigframes/operations/datetime_ops.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
5050
dtypes.FLOAT_DTYPE,
5151
dtypes.INT_DTYPE,
5252
dtypes.STRING_DTYPE,
53+
dtypes.DATE_DTYPE,
5354
):
5455
raise TypeError("expected string or numeric input")
5556
return pd.ArrowDtype(pa.timestamp("us", tz=None))
@@ -67,6 +68,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
6768
dtypes.FLOAT_DTYPE,
6869
dtypes.INT_DTYPE,
6970
dtypes.STRING_DTYPE,
71+
dtypes.DATE_DTYPE,
7072
):
7173
raise TypeError("expected string or numeric input")
7274
return pd.ArrowDtype(pa.timestamp("us", tz="UTC"))

tests/system/small/operations/test_datetimes.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,15 @@
1313
# limitations under the License.
1414

1515
import datetime
16+
import typing
1617

1718
import numpy
19+
from packaging import version
1820
from pandas import testing
1921
import pandas as pd
2022
import pytest
2123

24+
import bigframes.pandas as bpd
2225
import bigframes.series
2326
from bigframes.testing.utils import assert_series_equal
2427

@@ -548,3 +551,23 @@ def test_timedelta_dt_accessors_on_wrong_type_raise_exception(scalars_dfs, acces
548551

549552
with pytest.raises(TypeError):
550553
access(bf_df["timestamp_col"])
554+
555+
556+
@pytest.mark.parametrize(
557+
"col",
558+
# TODO(b/431276706) test timestamp_col too.
559+
["date_col", "datetime_col"],
560+
)
561+
def test_to_datetime(scalars_dfs, col):
562+
if version.Version(pd.__version__) <= version.Version("2.1.0"):
563+
pytest.skip("timezone conversion bug")
564+
bf_df, pd_df = scalars_dfs
565+
566+
actual_result = typing.cast(
567+
bigframes.series.Series, bpd.to_datetime(bf_df[col])
568+
).to_pandas()
569+
570+
expected_result = pd.Series(pd.to_datetime(pd_df[col]))
571+
testing.assert_series_equal(
572+
actual_result, expected_result, check_dtype=False, check_index_type=False
573+
)

third_party/bigframes_vendored/pandas/core/tools/datetimes.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
11
# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/tools/datetimes.py
22

3-
from datetime import datetime
3+
from datetime import date, datetime
44
from typing import List, Mapping, Tuple, Union
55

66
import pandas as pd
77

8-
from bigframes import constants, series
8+
from bigframes import constants, dataframe, series
99

1010
local_iterables = Union[List, Tuple, pd.Series, pd.DataFrame, Mapping]
1111

1212

1313
def to_datetime(
14-
arg,
14+
arg: Union[
15+
Union[int, float, str, datetime, date],
16+
local_iterables,
17+
series.Series,
18+
dataframe.DataFrame,
19+
],
1520
*,
1621
utc=False,
1722
format=None,
@@ -58,7 +63,7 @@ def to_datetime(
5863
dtype: timestamp[us, tz=UTC][pyarrow]
5964
6065
Args:
61-
arg (int, float, str, datetime, list, tuple, 1-d array, Series):
66+
arg (int, float, str, datetime, date, list, tuple, 1-d array, Series):
6267
The object to convert to a datetime.
6368
utc (bool, default False):
6469
Control timezone-related parsing, localization and conversion. If True, the

0 commit comments

Comments
 (0)