databento
diff --git a/‎CHANGELOG.md‎
Lines changed: 6 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎databento/__init__.py‎
Lines changed: 5 additions & 3 deletions b/‎databento/__init__.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎databento/common/bento.py‎
Lines changed: 13 additions & 22 deletions b/‎databento/common/bento.py‎
Lines changed: 13 additions & 22 deletions
diff --git a/‎databento/common/data.py‎
Lines changed: 3 additions & 0 deletions b/‎databento/common/data.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎databento/common/logging.py‎
Lines changed: 0 additions & 51 deletions b/‎databento/common/logging.py‎
Lines changed: 0 additions & 51 deletions
diff --git a/‎databento/common/utility.py‎
Lines changed: 34 additions & 0 deletions b/‎databento/common/utility.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎databento/common/validation.py‎
Lines changed: 30 additions & 0 deletions b/‎databento/common/validation.py‎
Lines changed: 30 additions & 0 deletions
@@ -1,5 +1,11 @@
 # Changelog
 
+## 0.8.2 - 2023-03-10
+- Removed `record_count` property from Bento class
+- Fixed bug in `Bento` where invalid metadata would prevent iteration
+- Improved use of the logging module
+- Changed `metadata.get_dataset_condition` response to a list of condition per date
+
 ## 0.8.1 - 2023-03-05
 - Fixed bug in `Bento` iteration where multiple readers were created
 - Added `from_dbn` convenience alias for loading DBN files
 
@@ -1,5 +1,6 @@
-from typing import Optional
+import logging
 
+from databento.common import utility
 from databento.common.bento import Bento
 from databento.common.enums import (
     Compression,
@@ -52,8 +53,9 @@
     "SymbologyResolution",
 ]
 
-# Set to either 'DEBUG' or 'INFO', controls console logging
-log: Optional[str] = None
+# Setup logging
+logging.getLogger(__name__).addHandler(logging.NullHandler())
 
 # Convenience imports
+enable_logging = utility.enable_logging
 from_dbn = Bento.from_file
@@ -269,8 +269,6 @@ class Bento:
         The raw compressed data in bytes.
     reader : IO[bytes]
         A zstd decompression stream.
-    record_count : int
-        The record count.
     schema : Schema
         The data record schema.
     start : pd.Timestamp
@@ -347,10 +345,17 @@ def __init__(self, data_source: DataSource) -> None:
 
     def __iter__(self) -> Generator[np.void, None, None]:
         reader = self.reader
-        for _ in range(self.record_count):
+        while True:
             raw = reader.read(self.record_size)
-            rec = np.frombuffer(raw, dtype=STRUCT_MAP[self.schema])
-            yield rec[0]
+            if raw:
+                rec = np.frombuffer(raw, dtype=STRUCT_MAP[self.schema])
+                yield rec[0]
+            else:
+                break
+
+    def __repr__(self) -> str:
+        name = self.__class__.__name__
+        return f"<{name}(schema={self.schema})>"
 
     def _apply_pretty_ts(self, df: pd.DataFrame) -> pd.DataFrame:
         df.index = pd.to_datetime(df.index, utc=True)
@@ -412,8 +417,10 @@ def _build_product_id_index(self) -> Dict[dt.date, Dict[int, str]]:
         return product_id_index
 
     def _prepare_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
+        # Setup column ordering and index
         df.set_index(self._get_index_column(), inplace=True)
-        df.drop(["length", "rtype"], axis=1, inplace=True)
+        df = df.reindex(columns=COLUMNS[self.schema])
+
         if self.schema == Schema.MBO or self.schema in DERIV_SCHEMAS:
             df["flags"] = df["flags"] & 0xFF  # Apply bitmask
             df["side"] = df["side"].str.decode("utf-8")
@@ -424,10 +431,6 @@ def _prepare_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
             for column, type_max in DEFINITION_TYPE_MAX_MAP.items():
                 if column in df.columns:
                     df[column] = df[column].where(df[column] != type_max, np.nan)
-
-        # Reorder columns
-        df = df.reindex(columns=COLUMNS[self.schema])
-
         return df
 
     def _get_index_column(self) -> str:
@@ -603,18 +606,6 @@ def reader(self) -> IO[bytes]:
         reader.seek(self._metadata_length)
         return reader
 
-    @property
-    def record_count(self) -> int:
-        """
-        Return the record count.
-
-        Returns
-        -------
-        int
-
-        """
-        return self._metadata["record_count"]
-
     @property
     def schema(self) -> Schema:
         """
 
@@ -266,6 +266,9 @@ def get_deriv_ba_fields(level: int) -> List[str]:
 
 DEFINITION_COLUMNS = [x for x in np.dtype(DEFINITION_MSG).names or ()]
 DEFINITION_COLUMNS.remove("ts_recv")  # Index
+DEFINITION_COLUMNS.remove("length")
+DEFINITION_COLUMNS.remove("rtype")
+DEFINITION_COLUMNS.remove("dummy")
 
 
 COLUMNS = {
 
@@ -0,0 +1,34 @@
+import logging
+from typing import Union
+
+
+def enable_logging(level: Union[int, str] = logging.INFO) -> None:
+    """
+    Enable logging for the Databento module.
+    This function should be used for simple applications and examples.
+    It is advisible to configure your own logging for serious applications.
+
+    Parameters
+    ----------
+    level : str or int, default 'INFO'
+        The log level to configure.
+
+    See Also
+    --------
+    logging
+
+    """
+    # Create a basic formatter
+    formatter = logging.Formatter(
+        fmt=logging.BASIC_FORMAT,
+    )
+
+    # Construct a stream handler for stderr
+    handler = logging.StreamHandler()
+    handler.setFormatter(formatter)
+    handler.setLevel(level=level)
+
+    # Add the handler to the databento logger
+    databento_logger = logging.getLogger("databento")
+    databento_logger.setLevel(level=level)
+    databento_logger.addHandler(handler)
@@ -143,6 +143,36 @@ def validate_gateway(
     return urlunsplit(components=("https", url_chunks.path, "", "", ""))
 
 
+def validate_semantic_string(value: str, param: str) -> str:
+    """
+    Validate whether a string contains a semantic value.
+    A string is considered absent of meaning if:
+        - It is empty.
+        - It contains only whitespace.
+        - It contains unprintable characters.
+
+    Parameters
+    ----------
+    value: str
+        The string to validate.
+    param : str
+        The name of the parameter being validated (for any error message).
+
+    Raises
+    ------
+    ValueError
+        If the string is not meaningful.
+
+    """
+    if not value:
+        raise ValueError(f"The `{param}` cannot be an empty string.")
+    if str.isspace(value):
+        raise ValueError(f"The `{param}` cannot contain only whitepsace.")
+    if not str.isprintable(value):
+        raise ValueError(f"The `{param}` cannot contain unprintable characters.")
+    return value
+
+
 def validate_smart_symbol(symbol: str) -> str:
     """
     Validate whether symbol has a valid smart symbol format.