Skip to content

Commit ae7646f

Browse files
committed
MOD: Change DBNStore.to_df() timezone type
1 parent 9b1c769 commit ae7646f

File tree

3 files changed

+64
-12
lines changed

3 files changed

+64
-12
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## 0.57.2 - TBD
4+
5+
#### Enhancements
6+
- Changed the `tz` parameter in `DBNStore.to_df()` to accept `datetime.tzinfo` instead of `pytz.BaseTzInfo` explicitly
7+
38
## 0.57.1 - 2025-06-17
49

510
#### Enhancements

databento/common/dbnstore.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from __future__ import annotations
22

33
import abc
4+
import datetime
45
import decimal
56
import itertools
67
import logging
78
import warnings
9+
import zoneinfo
810
from collections.abc import Generator
911
from collections.abc import Iterator
1012
from collections.abc import Mapping
@@ -28,7 +30,6 @@
2830
import pandas as pd
2931
import pyarrow as pa
3032
import pyarrow.parquet as pq
31-
import pytz
3233
import zstandard
3334
from databento_dbn import FIXED_PRICE_SCALE
3435
from databento_dbn import UNDEF_PRICE
@@ -859,7 +860,7 @@ def to_df(
859860
pretty_ts: bool = ...,
860861
map_symbols: bool = ...,
861862
schema: Schema | str | None = ...,
862-
tz: pytz.BaseTzInfo | str = ...,
863+
tz: datetime.tzinfo | str = ...,
863864
count: None = ...,
864865
) -> pd.DataFrame: ...
865866

@@ -870,7 +871,7 @@ def to_df(
870871
pretty_ts: bool = ...,
871872
map_symbols: bool = ...,
872873
schema: Schema | str | None = ...,
873-
tz: pytz.BaseTzInfo | str = ...,
874+
tz: datetime.tzinfo | str = ...,
874875
count: int = ...,
875876
) -> DataFrameIterator: ...
876877

@@ -880,8 +881,8 @@ def to_df(
880881
pretty_ts: bool = True,
881882
map_symbols: bool = True,
882883
schema: Schema | str | None = None,
883-
tz: pytz.BaseTzInfo | str | Default[pytz.BaseTzInfo] = Default[pytz.BaseTzInfo](
884-
pytz.UTC,
884+
tz: datetime.tzinfo | str | Default[datetime.tzinfo] = Default[datetime.tzinfo](
885+
datetime.timezone.utc,
885886
),
886887
count: int | None = None,
887888
) -> pd.DataFrame | DataFrameIterator:
@@ -909,7 +910,7 @@ def to_df(
909910
schema : Schema or str, optional
910911
The DBN schema for the dataframe.
911912
This is only required when reading a DBN stream with mixed record types.
912-
tz : pytz.BaseTzInfo or str, default UTC
913+
tz : datetime.tzinfo or str, default UTC
913914
If `pretty_ts` is `True`, all timestamps will be converted to the specified timezone.
914915
count : int, optional
915916
If set, instead of returning a single `DataFrame` a `DataFrameIterator`
@@ -939,8 +940,13 @@ def to_df(
939940
"A timezone was specified when `pretty_ts` is `False`. Did you mean to set `pretty_ts=True`?",
940941
)
941942

942-
if not isinstance(tz, pytz.BaseTzInfo):
943-
tz = pytz.timezone(tz)
943+
if isinstance(tz, str):
944+
tz = zoneinfo.ZoneInfo(tz)
945+
elif not isinstance(tz, datetime.tzinfo):
946+
raise ValueError(
947+
f"The value {tz!r} is not a valid datetime.tzinfo",
948+
)
949+
944950
if schema is None:
945951
if self.schema is None:
946952
raise ValueError("a schema must be specified for mixed DBN data")
@@ -1442,7 +1448,7 @@ def __init__(
14421448
count: int | None,
14431449
struct_type: type[DBNRecord],
14441450
instrument_map: InstrumentMap,
1445-
tz: pytz.BaseTzInfo,
1451+
tz: datetime.tzinfo,
14461452
price_type: PriceType = PriceType.FLOAT,
14471453
pretty_ts: bool = True,
14481454
map_symbols: bool = True,

tests/test_historical_bento.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import collections
22
import datetime as dt
33
import decimal
4+
import zoneinfo
45
from io import BytesIO
56
from pathlib import Path
67
from typing import Any
@@ -1567,11 +1568,51 @@ def test_dbnstore_to_df_with_timezone(
15671568
df.reset_index(inplace=True)
15681569

15691570
# Assert
1570-
expected_timezone = pytz.timezone(timezone)._utcoffset
1571+
expected_timezone = zoneinfo.ZoneInfo(timezone).utcoffset(None)
15711572
failures = []
15721573
struct = SCHEMA_STRUCT_MAP[schema]
15731574
for field in struct._timestamp_fields:
1574-
if df[field].dt.tz._utcoffset != expected_timezone:
1575+
if df[field].dt.tz.utcoffset(None) != expected_timezone:
1576+
failures.append(field)
1577+
1578+
assert not failures
1579+
1580+
1581+
@pytest.mark.parametrize(
1582+
"timezone",
1583+
[
1584+
pytz.timezone("US/Central"),
1585+
pytz.timezone("US/Eastern"),
1586+
pytz.timezone("Europe/Vienna"),
1587+
pytz.timezone("Asia/Dubai"),
1588+
pytz.timezone("UTC"),
1589+
],
1590+
)
1591+
@pytest.mark.parametrize(
1592+
"schema",
1593+
[pytest.param(schema, id=str(schema)) for schema in Schema.variants()],
1594+
)
1595+
def test_dbnstore_to_df_with_pytz_timezone(
1596+
test_data_path: Callable[[Dataset, Schema], Path],
1597+
schema: Schema,
1598+
timezone: pytz.BaseTzInfo,
1599+
) -> None:
1600+
"""
1601+
Test that setting the `tz` parameter in `DBNStore.to_df` accepts `pytz`
1602+
timezone objects.
1603+
"""
1604+
# Arrange
1605+
dbnstore = DBNStore.from_file(path=test_data_path(Dataset.GLBX_MDP3, schema))
1606+
1607+
# Act
1608+
df = dbnstore.to_df(tz=timezone)
1609+
df.reset_index(inplace=True)
1610+
1611+
# Assert
1612+
failures = []
1613+
struct = SCHEMA_STRUCT_MAP[schema]
1614+
for field in struct._timestamp_fields:
1615+
if df[field].dt.tz != timezone:
15751616
failures.append(field)
15761617

15771618
assert not failures
@@ -1591,7 +1632,7 @@ def test_dbnstore_to_df_with_timezone_pretty_ts_error(
15911632
with pytest.raises(ValueError):
15921633
dbnstore.to_df(
15931634
pretty_ts=False,
1594-
tz=pytz.UTC,
1635+
tz=zoneinfo.ZoneInfo("UTC"),
15951636
)
15961637

15971638

0 commit comments

Comments
 (0)