1313from databento .common .http import BentoHttpAPI
1414from databento .common .parsing import convert_date_columns
1515from databento .common .parsing import convert_datetime_columns
16- from databento .common .parsing import datetime_to_date_string
17- from databento .common .parsing import optional_date_to_string
16+ from databento .common .parsing import datetime_to_string
17+ from databento .common .parsing import optional_datetime_to_string
18+ from databento .common .parsing import optional_string_to_list
1819from databento .common .parsing import optional_symbols_list_to_list
19- from databento .common .publishers import Dataset
20- from databento .common .validation import validate_semantic_string
2120
2221
2322class CorporateActionsHttpAPI (BentoHttpAPI ):
@@ -31,12 +30,16 @@ def __init__(self, key: str, gateway: str) -> None:
3130
3231 def get_range (
3332 self ,
34- start_date : date | str ,
35- end_date : date | str | None = None ,
36- dataset : Dataset | str | None = None ,
33+ start : pd . Timestamp | date | str | int ,
34+ end : pd . Timestamp | date | str | int | None = None ,
35+ index : str = "event_date" ,
3736 symbols : Iterable [str ] | str | None = None ,
3837 stype_in : SType | str = "raw_symbol" ,
3938 events : Iterable [str ] | str | None = None ,
39+ countries : Iterable [str ] | str | None = None ,
40+ security_types : Iterable [str ] | str | None = None ,
41+ flatten : bool = True ,
42+ pit : bool = False ,
4043 ) -> pd .DataFrame :
4144 """
4245 Request a new corporate actions time series from Databento.
@@ -45,12 +48,17 @@ def get_range(
4548
4649 Parameters
4750 ----------
48- start_date : date or str
49- The start date (UTC) of the request time range (inclusive).
50- end_date : date or str, optional
51- The end date (UTC) of the request time range (exclusive).
52- dataset : Dataset or str, optional
53- The dataset code (string identifier) for the request.
51+ start : pd.Timestamp or date or str or int
52+ The start datetime of the request time range (inclusive).
53+ Assumes UTC as timezone unless passed a tz-aware object.
54+ If an integer is passed, then this represents nanoseconds since the UNIX epoch.
55+ end : pd.Timestamp or date or str or int, optional
56+ The end datetime of the request time range (exclusive).
57+ Assumes UTC as timezone unless passed a tz-aware object.
58+ If an integer is passed, then this represents nanoseconds since the UNIX epoch.
59+ index : str, default 'event_date'
60+ The index column to filter the `start` and `end` time range on.
61+ Use any of 'event_date', 'ex_date' or 'ts_record'.
5462 symbols : Iterable[str] or str, optional
5563 The symbols to filter for. Takes up to 2,000 symbols per request.
5664 If more than 1 symbol is specified, the data is merged and sorted by time.
@@ -62,28 +70,48 @@ def get_range(
6270 events : Iterable[str] or str, optional
6371 The event types to filter for.
6472 Takes any number of event types per request.
65- If not specified then will be for **all** event types.
73+ If not specified then will select **all** event types by default .
6674 See [EVENT](https://databento.com/docs/standards-and-conventions/reference-data-enums#event) enum.
75+ countries : Iterable[str] or str, optional
76+ The listing countries to filter for.
77+ Takes any number of two letter ISO 3166-1 alpha-2 country codes per request.
78+ If not specified then will select **all** listing countries by default.
79+ See [CNTRY](https://databento.com/docs/standards-and-conventions/reference-data-enums#cntry) enum.
80+ security_types : Iterable[str] or str, optional
81+ The security types to filter for.
82+ Takes any number of security types per request.
83+ If not specified then will select **all** security types by default.
84+ See [SECTYPE](https://databento.com/docs/standards-and-conventions/reference-data-enums#sectype) enum.
85+ flatten : bool, default True
86+ If nested JSON objects within the `date_info`, `rate_info`, and `event_info` fields
87+ should be flattened into separate columns in the resulting DataFrame.
88+ pit : bool, default False
89+ Determines whether to retain all historical records or only the latest records.
90+ If True, all historical records for each `event_unique_id` will be retained, preserving
91+ the complete point-in-time history.
92+ If False (default), the DataFrame will include only the most recent record for each
93+ `event_unique_id` based on the `ts_record` timestamp.
6794
6895 Returns
6996 -------
7097 pandas.DataFrame
7198 The data converted into a data frame.
7299
73100 """
74- dataset = validate_semantic_string (dataset , "dataset" ) if dataset is not None else None
75101 symbols_list = optional_symbols_list_to_list (symbols , SType .RAW_SYMBOL )
76-
77- if isinstance ( events , str ):
78- events = events . strip (). strip ( "," ). split ( "," )
102+ events = optional_string_to_list ( events )
103+ countries = optional_string_to_list ( countries )
104+ security_types = optional_string_to_list ( security_types )
79105
80106 data : dict [str , object | None ] = {
81- "start_date " : datetime_to_date_string ( start_date ),
82- "end_date " : optional_date_to_string ( end_date ),
83- "dataset " : dataset ,
107+ "start " : datetime_to_string ( start ),
108+ "end " : optional_datetime_to_string ( end ),
109+ "index " : index ,
84110 "symbols" : "," .join (symbols_list ),
85111 "stype_in" : stype_in ,
86112 "events" : "," .join (events ) if events else None ,
113+ "countries" : "," .join (countries ) if countries else None ,
114+ "security_types" : "," .join (security_types ) if security_types else None ,
87115 }
88116
89117 response = self ._post (
@@ -93,7 +121,35 @@ def get_range(
93121 )
94122
95123 df = pd .read_json (StringIO (response .text ), lines = True )
124+ if df .empty :
125+ return df
126+
96127 convert_datetime_columns (df , CORPORATE_ACTIONS_DATETIME_COLUMNS )
97128 convert_date_columns (df , CORPORATE_ACTIONS_DATE_COLUMNS )
98129
130+ if flatten :
131+ # Normalize the dynamic JSON fields
132+ date_info_normalized = pd .json_normalize (df ["date_info" ]).set_index (df .index )
133+ rate_info_normalized = pd .json_normalize (df ["rate_info" ]).set_index (df .index )
134+ event_info_normalized = pd .json_normalize (df ["event_info" ]).set_index (df .index )
135+
136+ # Merge normalized columns
137+ df = df .merge (date_info_normalized , left_index = True , right_index = True )
138+ df = df .merge (rate_info_normalized , left_index = True , right_index = True )
139+ df = df .merge (event_info_normalized , left_index = True , right_index = True )
140+
141+ # Drop the original JSON columns
142+ df .drop (columns = ["date_info" , "rate_info" , "event_info" ], inplace = True )
143+
144+ if pit :
145+ df .set_index (index , inplace = True )
146+ df .sort_index (inplace = True )
147+ else :
148+ # Filter for the latest record of each unique event
149+ df .sort_values ("ts_record" , inplace = True )
150+ df = df .groupby ("event_unique_id" ).agg ("last" ).reset_index ()
151+ df .set_index (index , inplace = True )
152+ if index != "ts_record" :
153+ df .sort_index (inplace = True )
154+
99155 return df
0 commit comments