Skip to content

Commit 26c4319

Browse files
committed
MOD: Adjust DataFrame formatting code
1 parent 8969d3c commit 26c4319

File tree

2 files changed

+18
-15
lines changed

2 files changed

+18
-15
lines changed

databento/common/constants.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
x[0]: np.iinfo(x[1]).max for x in InstrumentDefMsg._dtypes if not isinstance(x[1], str)
2626
}
2727

28-
INT64_NULL: Final = 9223372036854775807
29-
3028
SCHEMA_STRUCT_MAP: Final[dict[Schema, type[DBNRecord]]] = {
3129
Schema.DEFINITION: InstrumentDefMsg,
3230
Schema.IMBALANCE: ImbalanceMsg,

databento/common/dbnstore.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import pytz
3030
import zstandard
3131
from databento_dbn import FIXED_PRICE_SCALE
32+
from databento_dbn import UNDEF_PRICE
3233
from databento_dbn import Compression
3334
from databento_dbn import DBNDecoder
3435
from databento_dbn import Encoding
@@ -42,7 +43,6 @@
4243
from databento_dbn import VersionUpgradePolicy
4344

4445
from databento.common.constants import DEFINITION_TYPE_MAX_MAP
45-
from databento.common.constants import INT64_NULL
4646
from databento.common.constants import SCHEMA_STRUCT_MAP
4747
from databento.common.constants import SCHEMA_STRUCT_MAP_V1
4848
from databento.common.error import BentoError
@@ -1422,12 +1422,18 @@ def _format_map_symbols(self, df: pd.DataFrame) -> None:
14221422
# the first ordered field will be ts_recv or ts_event when appropriate
14231423
ts_name = self._struct_type._ordered_fields[0]
14241424

1425-
df_index = df[ts_name] if self._pretty_ts else pd.to_datetime(df[ts_name], utc=True)
1426-
dates = [ts.date() for ts in df_index]
1427-
df["symbol"] = [
1428-
self._instrument_map.resolve(inst, dates[i])
1429-
for i, inst in enumerate(df["instrument_id"])
1430-
]
1425+
if df.empty:
1426+
df["symbol"] = []
1427+
else:
1428+
df["symbol"] = df.apply(
1429+
lambda r: self._instrument_map.resolve(
1430+
r["instrument_id"],
1431+
(
1432+
r[ts_name] if self._pretty_ts else pd.to_datetime(r[ts_name], utc=True)
1433+
).date(),
1434+
),
1435+
axis=1,
1436+
)
14311437

14321438
def _format_timezone(self, df: pd.DataFrame) -> None:
14331439
for field in self._struct_type._timestamp_fields:
@@ -1441,13 +1447,12 @@ def _format_px(
14411447
px_fields = self._struct_type._price_fields
14421448

14431449
if price_type == "decimal":
1444-
for field in px_fields:
1445-
df[field] = (
1446-
df[field].replace(INT64_NULL, np.nan).apply(decimal.Decimal) / FIXED_PRICE_SCALE
1447-
)
1450+
df[px_fields] = (
1451+
df[px_fields].replace(UNDEF_PRICE, np.nan).applymap(decimal.Decimal)
1452+
/ FIXED_PRICE_SCALE
1453+
)
14481454
elif price_type == "float":
1449-
for field in px_fields:
1450-
df[field] = df[field].replace(INT64_NULL, np.nan) / FIXED_PRICE_SCALE
1455+
df[px_fields] = df[px_fields].replace(UNDEF_PRICE, np.nan) / FIXED_PRICE_SCALE
14511456
else:
14521457
return # do nothing
14531458

0 commit comments

Comments
 (0)