Skip to content

Commit d439038

Browse files
authored
VER: Release 0.8.2
See release notes.
2 parents 9d11f15 + 4566e30 commit d439038

39 files changed

+709
-473
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## 0.8.2 - 2023-03-10
4+
- Removed `record_count` property from Bento class
5+
- Fixed bug in `Bento` where invalid metadata would prevent iteration
6+
- Improved use of the logging module
7+
- Changed `metadata.get_dataset_condition` response to a list of condition per date
8+
39
## 0.8.1 - 2023-03-05
410
- Fixed bug in `Bento` iteration where multiple readers were created
511
- Added `from_dbn` convenience alias for loading DBN files

databento/__init__.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
from typing import Optional
1+
import logging
22

3+
from databento.common import utility
34
from databento.common.bento import Bento
45
from databento.common.enums import (
56
Compression,
@@ -52,8 +53,9 @@
5253
"SymbologyResolution",
5354
]
5455

55-
# Set to either 'DEBUG' or 'INFO', controls console logging
56-
log: Optional[str] = None
56+
# Setup logging
57+
logging.getLogger(__name__).addHandler(logging.NullHandler())
5758

5859
# Convenience imports
60+
enable_logging = utility.enable_logging
5961
from_dbn = Bento.from_file

databento/common/bento.py

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,6 @@ class Bento:
269269
The raw compressed data in bytes.
270270
reader : IO[bytes]
271271
A zstd decompression stream.
272-
record_count : int
273-
The record count.
274272
schema : Schema
275273
The data record schema.
276274
start : pd.Timestamp
@@ -347,10 +345,17 @@ def __init__(self, data_source: DataSource) -> None:
347345

348346
def __iter__(self) -> Generator[np.void, None, None]:
349347
reader = self.reader
350-
for _ in range(self.record_count):
348+
while True:
351349
raw = reader.read(self.record_size)
352-
rec = np.frombuffer(raw, dtype=STRUCT_MAP[self.schema])
353-
yield rec[0]
350+
if raw:
351+
rec = np.frombuffer(raw, dtype=STRUCT_MAP[self.schema])
352+
yield rec[0]
353+
else:
354+
break
355+
356+
def __repr__(self) -> str:
357+
name = self.__class__.__name__
358+
return f"<{name}(schema={self.schema})>"
354359

355360
def _apply_pretty_ts(self, df: pd.DataFrame) -> pd.DataFrame:
356361
df.index = pd.to_datetime(df.index, utc=True)
@@ -412,8 +417,10 @@ def _build_product_id_index(self) -> Dict[dt.date, Dict[int, str]]:
412417
return product_id_index
413418

414419
def _prepare_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
420+
# Setup column ordering and index
415421
df.set_index(self._get_index_column(), inplace=True)
416-
df.drop(["length", "rtype"], axis=1, inplace=True)
422+
df = df.reindex(columns=COLUMNS[self.schema])
423+
417424
if self.schema == Schema.MBO or self.schema in DERIV_SCHEMAS:
418425
df["flags"] = df["flags"] & 0xFF # Apply bitmask
419426
df["side"] = df["side"].str.decode("utf-8")
@@ -424,10 +431,6 @@ def _prepare_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
424431
for column, type_max in DEFINITION_TYPE_MAX_MAP.items():
425432
if column in df.columns:
426433
df[column] = df[column].where(df[column] != type_max, np.nan)
427-
428-
# Reorder columns
429-
df = df.reindex(columns=COLUMNS[self.schema])
430-
431434
return df
432435

433436
def _get_index_column(self) -> str:
@@ -603,18 +606,6 @@ def reader(self) -> IO[bytes]:
603606
reader.seek(self._metadata_length)
604607
return reader
605608

606-
@property
607-
def record_count(self) -> int:
608-
"""
609-
Return the record count.
610-
611-
Returns
612-
-------
613-
int
614-
615-
"""
616-
return self._metadata["record_count"]
617-
618609
@property
619610
def schema(self) -> Schema:
620611
"""

databento/common/data.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,9 @@ def get_deriv_ba_fields(level: int) -> List[str]:
266266

267267
DEFINITION_COLUMNS = [x for x in np.dtype(DEFINITION_MSG).names or ()]
268268
DEFINITION_COLUMNS.remove("ts_recv") # Index
269+
DEFINITION_COLUMNS.remove("length")
270+
DEFINITION_COLUMNS.remove("rtype")
271+
DEFINITION_COLUMNS.remove("dummy")
269272

270273

271274
COLUMNS = {

databento/common/logging.py

Lines changed: 0 additions & 51 deletions
This file was deleted.

databento/common/utility.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import logging
2+
from typing import Union
3+
4+
5+
def enable_logging(level: Union[int, str] = logging.INFO) -> None:
6+
"""
7+
Enable logging for the Databento module.
8+
This function should be used for simple applications and examples.
9+
It is advisible to configure your own logging for serious applications.
10+
11+
Parameters
12+
----------
13+
level : str or int, default 'INFO'
14+
The log level to configure.
15+
16+
See Also
17+
--------
18+
logging
19+
20+
"""
21+
# Create a basic formatter
22+
formatter = logging.Formatter(
23+
fmt=logging.BASIC_FORMAT,
24+
)
25+
26+
# Construct a stream handler for stderr
27+
handler = logging.StreamHandler()
28+
handler.setFormatter(formatter)
29+
handler.setLevel(level=level)
30+
31+
# Add the handler to the databento logger
32+
databento_logger = logging.getLogger("databento")
33+
databento_logger.setLevel(level=level)
34+
databento_logger.addHandler(handler)

databento/common/validation.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,36 @@ def validate_gateway(
143143
return urlunsplit(components=("https", url_chunks.path, "", "", ""))
144144

145145

146+
def validate_semantic_string(value: str, param: str) -> str:
147+
"""
148+
Validate whether a string contains a semantic value.
149+
A string is considered absent of meaning if:
150+
- It is empty.
151+
- It contains only whitespace.
152+
- It contains unprintable characters.
153+
154+
Parameters
155+
----------
156+
value: str
157+
The string to validate.
158+
param : str
159+
The name of the parameter being validated (for any error message).
160+
161+
Raises
162+
------
163+
ValueError
164+
If the string is not meaningful.
165+
166+
"""
167+
if not value:
168+
raise ValueError(f"The `{param}` cannot be an empty string.")
169+
if str.isspace(value):
170+
raise ValueError(f"The `{param}` cannot contain only whitepsace.")
171+
if not str.isprintable(value):
172+
raise ValueError(f"The `{param}` cannot contain unprintable characters.")
173+
return value
174+
175+
146176
def validate_smart_symbol(symbol: str) -> str:
147177
"""
148178
Validate whether symbol has a valid smart symbol format.

0 commit comments

Comments
 (0)