Skip to content

Commit 2d40538

Browse files
committed
FIX: Reduce FP error in Python client's pretty_px
1 parent cbace10 commit 2d40538

File tree

4 files changed

+23
-73
lines changed

4 files changed

+23
-73
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#### Bug fixes
1111
- Fixed issue where exception messages were displaying JSON encoded data
12+
- Reduced floating error when converting prices to floats with `pretty_px=True`
1213

1314
## 0.18.1 - 2023-08-16
1415

databento/common/data.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -51,36 +51,6 @@
5151
Schema.STATISTICS: StatMsg._dtypes,
5252
}
5353

54-
DEFINITION_CHARARRAY_COLUMNS = [
55-
"currency",
56-
"settl_currency",
57-
"secsubtype",
58-
"raw_symbol",
59-
"group",
60-
"exchange",
61-
"asset",
62-
"cfi",
63-
"security_type",
64-
"unit_of_measure",
65-
"underlying",
66-
"strike_price_currency",
67-
"instrument_class",
68-
"match_algorithm",
69-
"security_update_action",
70-
"user_defined_instrument",
71-
]
72-
73-
DEFINITION_PRICE_COLUMNS = [
74-
"min_price_increment",
75-
"high_limit_price",
76-
"low_limit_price",
77-
"max_price_variation",
78-
"trading_reference_price",
79-
"min_price_increment_amount",
80-
"price_ratio",
81-
"strike_price",
82-
]
83-
8454
DEFINITION_TYPE_MAX_MAP = {
8555
x[0]: np.iinfo(x[1]).max
8656
for x in InstrumentDefMsg._dtypes

databento/common/dbnstore.py

Lines changed: 14 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import numpy as np
1919
import pandas as pd
2020
import zstandard
21+
from databento_dbn import FIXED_PRICE_SCALE
2122
from databento_dbn import Compression
2223
from databento_dbn import DBNDecoder
2324
from databento_dbn import ErrorMsg
@@ -27,8 +28,6 @@
2728
from databento_dbn import SymbolMappingMsg
2829
from databento_dbn import SystemMsg
2930

30-
from databento.common.data import DEFINITION_CHARARRAY_COLUMNS
31-
from databento.common.data import DEFINITION_PRICE_COLUMNS
3231
from databento.common.data import DEFINITION_TYPE_MAX_MAP
3332
from databento.common.data import DERIV_SCHEMAS
3433
from databento.common.data import SCHEMA_COLUMNS
@@ -49,7 +48,6 @@
4948
]
5049

5150
INT64_NULL = 9223372036854775807
52-
NAN = float("NaN")
5351

5452

5553
logger = logging.getLogger(__name__)
@@ -398,36 +396,16 @@ def __repr__(self) -> str:
398396

399397
def _apply_pretty_ts(self, df: pd.DataFrame) -> pd.DataFrame:
400398
df.index = pd.to_datetime(df.index, utc=True)
401-
for column in df.columns:
402-
if column.startswith("ts_") and "delta" not in column:
403-
df[column] = pd.to_datetime(df[column], errors="coerce", utc=True)
404-
405-
if self.schema == Schema.DEFINITION:
406-
df["expiration"] = pd.to_datetime(
407-
df["expiration"],
408-
errors="coerce",
409-
utc=True,
410-
)
411-
df["activation"] = pd.to_datetime(
412-
df["activation"],
413-
errors="coerce",
414-
utc=True,
415-
)
399+
for column in SCHEMA_STRUCT_MAP[self.schema]._timestamp_fields:
400+
if df.index.name == column:
401+
continue
402+
df[column] = pd.to_datetime(df[column], errors="coerce", utc=True)
416403

417404
return df
418405

419406
def _apply_pretty_px(self, df: pd.DataFrame) -> pd.DataFrame:
420-
for column in list(df.columns):
421-
if (
422-
column in ("price", "open", "high", "low", "close")
423-
or column.startswith("bid_px") # MBP
424-
or column.startswith("ask_px") # MBP
425-
):
426-
df[column] = df[column].replace(INT64_NULL, NAN) * 1e-9
427-
428-
if self.schema == Schema.DEFINITION:
429-
for column in DEFINITION_PRICE_COLUMNS:
430-
df[column] = df[column].replace(INT64_NULL, NAN) * 1e-9
407+
for column in SCHEMA_STRUCT_MAP[self.schema]._price_fields:
408+
df[column] = df[column].replace(INT64_NULL, np.nan) / FIXED_PRICE_SCALE
431409

432410
return df
433411

@@ -468,13 +446,14 @@ def _prepare_dataframe(
468446
df: pd.DataFrame,
469447
schema: Schema,
470448
) -> pd.DataFrame:
449+
# char array columns
450+
hidden_fields = SCHEMA_STRUCT_MAP[self.schema]._hidden_fields
451+
for column, dtype in SCHEMA_DTYPES_MAP[self.schema]:
452+
if dtype.startswith("S") and column not in hidden_fields:
453+
df[column] = df[column].str.decode("utf-8")
471454
if schema == Schema.MBO or schema in DERIV_SCHEMAS:
472455
df["flags"] = df["flags"] & 0xFF # Apply bitmask
473-
df["side"] = df["side"].str.decode("utf-8")
474-
df["action"] = df["action"].str.decode("utf-8")
475456
elif schema == Schema.DEFINITION:
476-
for column in DEFINITION_CHARARRAY_COLUMNS:
477-
df[column] = df[column].str.decode("utf-8")
478457
for column, type_max in DEFINITION_TYPE_MAX_MAP.items():
479458
if column in df.columns:
480459
df[column] = df[column].where(df[column] != type_max, np.nan)
@@ -715,7 +694,7 @@ def symbology(self) -> dict[str, Any]:
715694
"stype_in": str(self.stype_in),
716695
"stype_out": str(self.stype_out),
717696
"start_date": str(self.start.date()),
718-
"end_date": str(self.end.date()),
697+
"end_date": str(self.end.date()) if self.end else None,
719698
"partial": self._metadata.partial,
720699
"not_found": self._metadata.not_found,
721700
"mappings": self.mappings,
@@ -862,7 +841,7 @@ def request_symbology(self, client: Historical) -> dict[str, Any]:
862841
stype_in=self.stype_in,
863842
stype_out=self.stype_out,
864843
start_date=self.start.date(),
865-
end_date=self.end.date(),
844+
end_date=self.end.date() if self.end else None,
866845
)
867846

868847
def to_csv(

tests/test_historical_bento.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -572,14 +572,14 @@ def test_mbp_1_to_csv_with_all_options_writes_expected_file_to_disk(
572572
b"ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags"
573573
b",ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,sy"
574574
b"mbol\n2020-12-28 13:00:00.006136329+00:00,2020-12-28 13:00:00.006001487+00:00,1,1"
575-
b",5482,A,A,0,3720.5000000000005,1,128,17214,1170362,3720.2500000000005,3720.500000000"
576-
b"0005,24,11,15,9,ESH1\n2020-12-28 13:00:00.006246513+00:00,2020-12-28 13:00:00.006"
577-
b"146661+00:00,1,1,5482,A,A,0,3720.5000000000005,1,128,18858,1170364,3720.250000000000"
578-
b"5,3720.5000000000005,24,12,15,10,ESH1\n2020-12-28 13:00:00.007159323+00:00,2020-1"
579-
b"2-28 13:00:00.007044577+00:00,1,1,5482,A,B,0,3720.2500000000005,2,128,18115,1170365,"
580-
b"3720.2500000000005,3720.5000000000005,26,12,16,10,ESH1\n2020-12-28 13:00:00.00726"
581-
b"0967+00:00,2020-12-28 13:00:00.007169135+00:00,1,1,5482,C,A,0,3720.5000000000005,1,1"
582-
b"28,17361,1170366,3720.2500000000005,3720.5000000000005,26,11,16,9,ESH1\n"
575+
b",5482,A,A,0,3720.5,1,128,17214,1170362,3720.25,3720.5"
576+
b",24,11,15,9,ESH1\n2020-12-28 13:00:00.006246513+00:00,2020-12-28 13:00:00.006"
577+
b"146661+00:00,1,1,5482,A,A,0,3720.5,1,128,18858,1170364,3720.25"
578+
b",3720.5,24,12,15,10,ESH1\n2020-12-28 13:00:00.007159323+00:00,2020-1"
579+
b"2-28 13:00:00.007044577+00:00,1,1,5482,A,B,0,3720.25,2,128,18115,1170365,"
580+
b"3720.25,3720.5,26,12,16,10,ESH1\n2020-12-28 13:00:00.00726"
581+
b"0967+00:00,2020-12-28 13:00:00.007169135+00:00,1,1,5482,C,A,0,3720.5,1,1"
582+
b"28,17361,1170366,3720.25,3720.5,26,11,16,9,ESH1\n"
583583
)
584584

585585
if sys.platform == "win32":

0 commit comments

Comments
 (0)