Skip to content

Commit 0ff7371

Browse files
authored
fix: avoid rounding problems with microseconds (#20)
* fix: avoid rounding problems with microseconds * add more tests * adjust regex to parse fraction as integer
1 parent de3bb56 commit 0ff7371

File tree

4 files changed

+161
-59
lines changed

4 files changed

+161
-59
lines changed

db_dtypes/__init__.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -69,29 +69,33 @@ def _datetime(
6969
cls,
7070
scalar,
7171
match_fn=re.compile(
72-
r"\s*(?P<hour>\d+)(?::(?P<minute>\d+)(?::(?P<second>\d+(?:[.]\d+)?)?)?)?\s*$"
72+
r"\s*(?P<hours>\d+)"
73+
r"(?::(?P<minutes>\d+)"
74+
r"(?::(?P<seconds>\d+)"
75+
r"(?:\.(?P<fraction>\d*))?)?)?\s*$"
7376
).match,
7477
):
7578
if isinstance(scalar, datetime.time):
7679
return datetime.datetime.combine(cls._epoch, scalar)
7780
elif isinstance(scalar, str):
7881
# iso string
79-
match = match_fn(scalar)
80-
if not match:
82+
parsed = match_fn(scalar)
83+
if not parsed:
8184
raise ValueError(f"Bad time string: {repr(scalar)}")
8285

83-
hour = match.group("hour")
84-
minute = match.group("minute")
85-
second = match.group("second")
86-
second, microsecond = divmod(float(second if second else 0), 1)
86+
hours = parsed.group("hours")
87+
minutes = parsed.group("minutes")
88+
seconds = parsed.group("seconds")
89+
fraction = parsed.group("fraction")
90+
microseconds = int(fraction.ljust(6, "0")[:6]) if fraction else 0
8791
return datetime.datetime(
8892
1970,
8993
1,
9094
1,
91-
int(hour),
92-
int(minute if minute else 0),
93-
int(second),
94-
int(microsecond * 1_000_000),
95+
int(hours),
96+
int(minutes) if minutes else 0,
97+
int(seconds) if seconds else 0,
98+
microseconds,
9599
)
96100
else:
97101
raise TypeError("Invalid value type", scalar)

tests/unit/test_date.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import datetime
16+
17+
import pandas
18+
import pytest
19+
20+
# To register the types.
21+
import db_dtypes # noqa
22+
23+
24+
@pytest.mark.parametrize(
25+
"value, expected",
26+
[
27+
# Min/Max values for pandas.Timestamp.
28+
("1677-09-22", datetime.date(1677, 9, 22)),
29+
("2262-04-11", datetime.date(2262, 4, 11)),
30+
# Typical "zero" values.
31+
("1900-01-01", datetime.date(1900, 1, 1)),
32+
("1970-01-01", datetime.date(1970, 1, 1)),
33+
# Assorted values.
34+
("1993-10-31", datetime.date(1993, 10, 31)),
35+
("2012-02-29", datetime.date(2012, 2, 29)),
36+
("2021-12-17", datetime.date(2021, 12, 17)),
37+
("2038-01-19", datetime.date(2038, 1, 19)),
38+
],
39+
)
40+
def test_date_parsing(value, expected):
41+
assert pandas.Series([value], dtype="date")[0] == expected
42+
43+
44+
@pytest.mark.parametrize(
45+
"value, error",
46+
[
47+
("thursday", "Bad date string: 'thursday'"),
48+
("1-2-thursday", "Bad date string: '1-2-thursday'"),
49+
("1-2-3-4", "Bad date string: '1-2-3-4'"),
50+
("1-2-3.f", "Bad date string: '1-2-3.f'"),
51+
("1-d-3", "Bad date string: '1-d-3'"),
52+
("1-3", "Bad date string: '1-3'"),
53+
("1", "Bad date string: '1'"),
54+
("", "Bad date string: ''"),
55+
("2021-2-99", "day is out of range for month"),
56+
("2021-99-1", "month must be in 1[.][.]12"),
57+
("10000-1-1", "year 10000 is out of range"),
58+
],
59+
)
60+
def test_date_parsing_errors(value, error):
61+
with pytest.raises(ValueError, match=error):
62+
pandas.Series([value], dtype="date")

tests/unit/test_dtypes.py

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -621,51 +621,3 @@ def test_date_sub():
621621
do = pd.Series([pd.DateOffset(days=i) for i in range(4)])
622622
expect = dates.astype("object") - do
623623
assert np.array_equal(dates - do, expect)
624-
625-
626-
@pytest.mark.parametrize(
627-
"value, expected", [("1", datetime.time(1)), ("1:2", datetime.time(1, 2))],
628-
)
629-
def test_short_time_parsing(value, expected):
630-
assert _cls("time")([value])[0] == expected
631-
632-
633-
@pytest.mark.parametrize(
634-
"value, error",
635-
[
636-
("thursday", "Bad time string: 'thursday'"),
637-
("1:2:3thursday", "Bad time string: '1:2:3thursday'"),
638-
("1:2:3:4", "Bad time string: '1:2:3:4'"),
639-
("1:2:3.f", "Bad time string: '1:2:3.f'"),
640-
("1:d:3", "Bad time string: '1:d:3'"),
641-
("1:2.3", "Bad time string: '1:2.3'"),
642-
("", "Bad time string: ''"),
643-
("1:2:99", "second must be in 0[.][.]59"),
644-
("1:99", "minute must be in 0[.][.]59"),
645-
("99", "hour must be in 0[.][.]23"),
646-
],
647-
)
648-
def test_bad_time_parsing(value, error):
649-
with pytest.raises(ValueError, match=error):
650-
_cls("time")([value])
651-
652-
653-
@pytest.mark.parametrize(
654-
"value, error",
655-
[
656-
("thursday", "Bad date string: 'thursday'"),
657-
("1-2-thursday", "Bad date string: '1-2-thursday'"),
658-
("1-2-3-4", "Bad date string: '1-2-3-4'"),
659-
("1-2-3.f", "Bad date string: '1-2-3.f'"),
660-
("1-d-3", "Bad date string: '1-d-3'"),
661-
("1-3", "Bad date string: '1-3'"),
662-
("1", "Bad date string: '1'"),
663-
("", "Bad date string: ''"),
664-
("2021-2-99", "day is out of range for month"),
665-
("2021-99-1", "month must be in 1[.][.]12"),
666-
("10000-1-1", "year 10000 is out of range"),
667-
],
668-
)
669-
def test_bad_date_parsing(value, error):
670-
with pytest.raises(ValueError, match=error):
671-
_cls("date")([value])

tests/unit/test_time.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import datetime
16+
17+
import pandas
18+
import pytest
19+
20+
# To register the types.
21+
import db_dtypes # noqa
22+
23+
24+
@pytest.mark.parametrize(
25+
"value, expected",
26+
[
27+
# Midnight
28+
("0", datetime.time(0)),
29+
("0:0", datetime.time(0)),
30+
("0:0:0", datetime.time(0)),
31+
("0:0:0.", datetime.time(0)),
32+
("0:0:0.0", datetime.time(0)),
33+
("0:0:0.000000", datetime.time(0)),
34+
("00:00:00", datetime.time(0, 0, 0)),
35+
(" 00:00:00 ", datetime.time(0, 0, 0)),
36+
# Short values
37+
("1", datetime.time(1)),
38+
("23", datetime.time(23)),
39+
("1:2", datetime.time(1, 2)),
40+
("23:59", datetime.time(23, 59)),
41+
("1:2:3", datetime.time(1, 2, 3)),
42+
("23:59:59", datetime.time(23, 59, 59)),
43+
# Non-octal values.
44+
("08:08:08", datetime.time(8, 8, 8)),
45+
("09:09:09", datetime.time(9, 9, 9)),
46+
# Fractional seconds can cause rounding problems if cast to float. See:
47+
# https://github.com/googleapis/python-db-dtypes-pandas/issues/18
48+
("0:0:59.876543", datetime.time(0, 0, 59, 876543)),
49+
("01:01:01.010101", datetime.time(1, 1, 1, 10101)),
50+
("09:09:09.090909", datetime.time(9, 9, 9, 90909)),
51+
("11:11:11.111111", datetime.time(11, 11, 11, 111111)),
52+
("19:16:23.987654", datetime.time(19, 16, 23, 987654)),
53+
# Microsecond precision
54+
("00:00:00.000001", datetime.time(0, 0, 0, 1)),
55+
("23:59:59.999999", datetime.time(23, 59, 59, 999_999)),
56+
# TODO: Support nanosecond precision values without truncation.
57+
# https://github.com/googleapis/python-db-dtypes-pandas/issues/19
58+
("0:0:0.000001001", datetime.time(0, 0, 0, 1)),
59+
("23:59:59.999999000", datetime.time(23, 59, 59, 999_999)),
60+
("23:59:59.999999999", datetime.time(23, 59, 59, 999_999)),
61+
],
62+
)
63+
def test_time_parsing(value, expected):
64+
assert pandas.Series([value], dtype="time")[0] == expected
65+
66+
67+
@pytest.mark.parametrize(
68+
"value, error",
69+
[
70+
("thursday", "Bad time string: 'thursday'"),
71+
("1:2:3thursday", "Bad time string: '1:2:3thursday'"),
72+
("1:2:3:4", "Bad time string: '1:2:3:4'"),
73+
("1:2:3.f", "Bad time string: '1:2:3.f'"),
74+
("1:d:3", "Bad time string: '1:d:3'"),
75+
("1:2.3", "Bad time string: '1:2.3'"),
76+
("", "Bad time string: ''"),
77+
("1:2:99", "second must be in 0[.][.]59"),
78+
("1:99", "minute must be in 0[.][.]59"),
79+
("99", "hour must be in 0[.][.]23"),
80+
],
81+
)
82+
def test_time_parsing_errors(value, error):
83+
with pytest.raises(ValueError, match=error):
84+
pandas.Series([value], dtype="time")

0 commit comments

Comments
 (0)