1515from databento .common .parsing import convert_datetime_columns
1616from databento .common .parsing import datetime_to_string
1717from databento .common .parsing import optional_datetime_to_string
18+ from databento .common .parsing import optional_string_to_list
1819from databento .common .parsing import optional_symbols_list_to_list
1920
2021
@@ -31,10 +32,14 @@ def get_range(
3132 self ,
3233 start : pd .Timestamp | date | str | int ,
3334 end : pd .Timestamp | date | str | int | None = None ,
35+ index : str = "event_date" ,
3436 symbols : Iterable [str ] | str | None = None ,
3537 stype_in : SType | str = "raw_symbol" ,
3638 events : Iterable [str ] | str | None = None ,
37- us_only : bool = False ,
39+ countries : Iterable [str ] | str | None = None ,
40+ security_types : Iterable [str ] | str | None = None ,
41+ flatten : bool = True ,
42+ pit : bool = False ,
3843 ) -> pd .DataFrame :
3944 """
4045 Request a new corporate actions time series from Databento.
@@ -51,8 +56,9 @@ def get_range(
5156 The end datetime of the request time range (exclusive).
5257 Assumes UTC as timezone unless passed a tz-aware object.
5358 If an integer is passed, then this represents nanoseconds since the UNIX epoch.
54- Values are forward filled based on the resolution provided.
55- Defaults to the same value as `start`.
59+ index : str, default 'event_date'
60+ The index column to filter the `start` and `end` time range on.
61+ Use any of 'event_date', 'ex_date' or 'ts_record'.
5662 symbols : Iterable[str] or str, optional
5763 The symbols to filter for. Takes up to 2,000 symbols per request.
5864 If more than 1 symbol is specified, the data is merged and sorted by time.
@@ -64,10 +70,27 @@ def get_range(
6470 events : Iterable[str] or str, optional
6571 The event types to filter for.
6672 Takes any number of event types per request.
67- If not specified then will be for **all** event types.
73+ If not specified then will select **all** event types by default .
6874 See [EVENT](https://databento.com/docs/standards-and-conventions/reference-data-enums#event) enum.
69- us_only : bool, default False
70- If filtering for US markets only.
75+ countries : Iterable[str] or str, optional
76+ The listing countries to filter for.
77+ Takes any number of two letter ISO 3166-1 alpha-2 country codes per request.
78+ If not specified then will select **all** listing countries by default.
79+ See [CNTRY](https://databento.com/docs/standards-and-conventions/reference-data-enums#cntry) enum.
80+ security_types : Iterable[str] or str, optional
81+ The security types to filter for.
82+ Takes any number of security types per request.
83+ If not specified then will select **all** security types by default.
84+ See [SECTYPE](https://databento.com/docs/standards-and-conventions/reference-data-enums#sectype) enum.
85+ flatten : bool, default True
86+ If nested JSON objects within the `date_info`, `rate_info`, and `event_info` fields
87+ should be flattened into separate columns in the resulting DataFrame.
88+ pit : bool, default False
89+ Determines whether to retain all historical records or only the latest records.
90+ If True, all historical records for each `event_unique_id` will be retained, preserving
91+ the complete point-in-time history.
92+ If False (default), the DataFrame will include only the most recent record for each
93+ `event_unique_id` based on the `ts_record` timestamp.
7194
7295 Returns
7396 -------
@@ -76,17 +99,19 @@ def get_range(
7699
77100 """
78101 symbols_list = optional_symbols_list_to_list (symbols , SType .RAW_SYMBOL )
79-
80- if isinstance ( events , str ):
81- events = events . strip (). strip ( "," ). split ( "," )
102+ events = optional_string_to_list ( events )
103+ countries = optional_string_to_list ( countries )
104+ security_types = optional_string_to_list ( security_types )
82105
83106 data : dict [str , object | None ] = {
84107 "start" : datetime_to_string (start ),
85108 "end" : optional_datetime_to_string (end ),
109+ "index" : index ,
86110 "symbols" : "," .join (symbols_list ),
87111 "stype_in" : stype_in ,
88112 "events" : "," .join (events ) if events else None ,
89- "us_only" : us_only ,
113+ "countries" : "," .join (countries ) if countries else None ,
114+ "security_types" : "," .join (security_types ) if security_types else None ,
90115 }
91116
92117 response = self ._post (
@@ -96,7 +121,35 @@ def get_range(
96121 )
97122
98123 df = pd .read_json (StringIO (response .text ), lines = True )
124+ if df .empty :
125+ return df
126+
99127 convert_datetime_columns (df , CORPORATE_ACTIONS_DATETIME_COLUMNS )
100128 convert_date_columns (df , CORPORATE_ACTIONS_DATE_COLUMNS )
101129
130+ if flatten :
131+ # Normalize the dynamic JSON fields
132+ date_info_normalized = pd .json_normalize (df ["date_info" ]).set_index (df .index )
133+ rate_info_normalized = pd .json_normalize (df ["rate_info" ]).set_index (df .index )
134+ event_info_normalized = pd .json_normalize (df ["event_info" ]).set_index (df .index )
135+
136+ # Merge normalized columns
137+ df = df .merge (date_info_normalized , left_index = True , right_index = True )
138+ df = df .merge (rate_info_normalized , left_index = True , right_index = True )
139+ df = df .merge (event_info_normalized , left_index = True , right_index = True )
140+
141+ # Drop the original JSON columns
142+ df .drop (columns = ["date_info" , "rate_info" , "event_info" ], inplace = True )
143+
144+ if pit :
145+ df .set_index (index , inplace = True )
146+ df .sort_index (inplace = True )
147+ else :
148+ # Filter for the latest record of each unique event
149+ df .sort_values ("ts_record" , inplace = True )
150+ df = df .groupby ("event_unique_id" ).agg ("last" ).reset_index ()
151+ df .set_index (index , inplace = True )
152+ if index != "ts_record" :
153+ df .sort_index (inplace = True )
154+
102155 return df
0 commit comments