Skip to content

Commit 0960ec1

Browse files
committed
MOD: Improve corporate actions API
1 parent f4f8c3f commit 0960ec1

File tree

5 files changed

+221
-16
lines changed

5 files changed

+221
-16
lines changed

databento/common/parsing.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,26 @@ def optional_values_list_to_string(
5858
return values_list_to_string(values)
5959

6060

61+
def optional_string_to_list(
62+
value: Iterable[str] | str | None,
63+
) -> Iterable[str] | list[str] | None:
64+
"""
65+
Convert a comma-separated string into a list of strings, or return the
66+
original input if not a string.
67+
68+
Parameters
69+
----------
70+
value : iterable of str or str, optional
71+
The input value to be parsed.
72+
73+
Returns
74+
-------
75+
Iterable[str] | list[str] | `None`
76+
77+
"""
78+
return value.strip().strip(",").split(",") if isinstance(value, str) else value
79+
80+
6181
def optional_symbols_list_to_list(
6282
symbols: Iterable[str | int | Integral] | str | int | Integral | None,
6383
stype_in: SType,

databento/reference/api/corporate.py

Lines changed: 63 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from databento.common.parsing import convert_datetime_columns
1616
from databento.common.parsing import datetime_to_string
1717
from databento.common.parsing import optional_datetime_to_string
18+
from databento.common.parsing import optional_string_to_list
1819
from databento.common.parsing import optional_symbols_list_to_list
1920

2021

@@ -31,10 +32,14 @@ def get_range(
3132
self,
3233
start: pd.Timestamp | date | str | int,
3334
end: pd.Timestamp | date | str | int | None = None,
35+
index: str = "event_date",
3436
symbols: Iterable[str] | str | None = None,
3537
stype_in: SType | str = "raw_symbol",
3638
events: Iterable[str] | str | None = None,
37-
us_only: bool = False,
39+
countries: Iterable[str] | str | None = None,
40+
security_types: Iterable[str] | str | None = None,
41+
flatten: bool = True,
42+
pit: bool = False,
3843
) -> pd.DataFrame:
3944
"""
4045
Request a new corporate actions time series from Databento.
@@ -51,8 +56,9 @@ def get_range(
5156
The end datetime of the request time range (exclusive).
5257
Assumes UTC as timezone unless passed a tz-aware object.
5358
If an integer is passed, then this represents nanoseconds since the UNIX epoch.
54-
Values are forward filled based on the resolution provided.
55-
Defaults to the same value as `start`.
59+
index : str, default 'event_date'
60+
The index column to filter the `start` and `end` time range on.
61+
Use any of 'event_date', 'ex_date' or 'ts_record'.
5662
symbols : Iterable[str] or str, optional
5763
The symbols to filter for. Takes up to 2,000 symbols per request.
5864
If more than 1 symbol is specified, the data is merged and sorted by time.
@@ -64,10 +70,27 @@ def get_range(
6470
events : Iterable[str] or str, optional
6571
The event types to filter for.
6672
Takes any number of event types per request.
67-
If not specified then will be for **all** event types.
73+
If not specified then will select **all** event types by default.
6874
See [EVENT](https://databento.com/docs/standards-and-conventions/reference-data-enums#event) enum.
69-
us_only : bool, default False
70-
If filtering for US markets only.
75+
countries : Iterable[str] or str, optional
76+
The listing countries to filter for.
77+
Takes any number of two letter ISO 3166-1 alpha-2 country codes per request.
78+
If not specified then will select **all** listing countries by default.
79+
See [CNTRY](https://databento.com/docs/standards-and-conventions/reference-data-enums#cntry) enum.
80+
security_types : Iterable[str] or str, optional
81+
The security types to filter for.
82+
Takes any number of security types per request.
83+
If not specified then will select **all** security types by default.
84+
See [SECTYPE](https://databento.com/docs/standards-and-conventions/reference-data-enums#sectype) enum.
85+
flatten : bool, default True
86+
If nested JSON objects within the `date_info`, `rate_info`, and `event_info` fields
87+
should be flattened into separate columns in the resulting DataFrame.
88+
pit : bool, default False
89+
Determines whether to retain all historical records or only the latest records.
90+
If True, all historical records for each `event_unique_id` will be retained, preserving
91+
the complete point-in-time history.
92+
If False (default), the DataFrame will include only the most recent record for each
93+
`event_unique_id` based on the `ts_record` timestamp.
7194
7295
Returns
7396
-------
@@ -76,17 +99,19 @@ def get_range(
7699
77100
"""
78101
symbols_list = optional_symbols_list_to_list(symbols, SType.RAW_SYMBOL)
79-
80-
if isinstance(events, str):
81-
events = events.strip().strip(",").split(",")
102+
events = optional_string_to_list(events)
103+
countries = optional_string_to_list(countries)
104+
security_types = optional_string_to_list(security_types)
82105

83106
data: dict[str, object | None] = {
84107
"start": datetime_to_string(start),
85108
"end": optional_datetime_to_string(end),
109+
"index": index,
86110
"symbols": ",".join(symbols_list),
87111
"stype_in": stype_in,
88112
"events": ",".join(events) if events else None,
89-
"us_only": us_only,
113+
"countries": ",".join(countries) if countries else None,
114+
"security_types": ",".join(security_types) if security_types else None,
90115
}
91116

92117
response = self._post(
@@ -96,7 +121,35 @@ def get_range(
96121
)
97122

98123
df = pd.read_json(StringIO(response.text), lines=True)
124+
if df.empty:
125+
return df
126+
99127
convert_datetime_columns(df, CORPORATE_ACTIONS_DATETIME_COLUMNS)
100128
convert_date_columns(df, CORPORATE_ACTIONS_DATE_COLUMNS)
101129

130+
if flatten:
131+
# Normalize the dynamic JSON fields
132+
date_info_normalized = pd.json_normalize(df["date_info"]).set_index(df.index)
133+
rate_info_normalized = pd.json_normalize(df["rate_info"]).set_index(df.index)
134+
event_info_normalized = pd.json_normalize(df["event_info"]).set_index(df.index)
135+
136+
# Merge normalized columns
137+
df = df.merge(date_info_normalized, left_index=True, right_index=True)
138+
df = df.merge(rate_info_normalized, left_index=True, right_index=True)
139+
df = df.merge(event_info_normalized, left_index=True, right_index=True)
140+
141+
# Drop the original JSON columns
142+
df.drop(columns=["date_info", "rate_info", "event_info"], inplace=True)
143+
144+
if pit:
145+
df.set_index(index, inplace=True)
146+
df.sort_index(inplace=True)
147+
else:
148+
# Filter for the latest record of each unique event
149+
df.sort_values("ts_record", inplace=True)
150+
df = df.groupby("event_unique_id").agg("last").reset_index()
151+
df.set_index(index, inplace=True)
152+
if index != "ts_record":
153+
df.sort_index(inplace=True)
154+
102155
return df

examples/reference_corporate_actions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
start="2023",
1515
end="2024-04",
1616
events="DIV,LIQ",
17-
us_only=True,
17+
countries="US",
1818
)
1919

2020
pprint(response.head())
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"ts_record":"2023-10-10T04:37:14Z","event_unique_id":"U-40179043276-1888618","event_id":"E-9043276-RSPLT","listing_id":"L-1888618","listing_group_id":"LG-888618","security_id":"S-799713","issuer_id":"I-119138","event_action":"U","event":"RSPLT","event_subtype":"CONSD","event_date_label":"ex_date","event_date":"2023-10-10","event_created_date":"2023-02-09","effective_date":null,"ex_date":"2023-10-10","record_date":null,"record_date_id":"D-9043276","related_event":null,"related_event_id":null,"global_status":"A","listing_status":"T","listing_source":"M","listing_date":"2008-03-18","delisting_date":null,"issuer_name":"Bowmo Inc","security_type":"EQS","security_description":"Ordinary Shares","primary_exchange":"USOTC","exchange":"USOTC","operating_mic":"OTCM","symbol":"BOMO","nasdaq_symbol":"BOMO","local_code":"BOMO","isin":"US2288912064","us_code":"228891206","bbg_comp_id":"BBG000VCV3H9","bbg_comp_ticker":"BOMO US","figi":"BBG000VCV726","figi_ticker":"BOMO UV","listing_country":"US","register_country":"US","trading_currency":"USD","multi_currency":false,"segment_mic_name":"Pink Current Information","segment_mic":"PINC","mand_volu_flag":"M","rd_priority":1,"lot_size":100,"par_value":0.01,"par_value_currency":"USD","payment_date":null,"duebills_redemption_date":null,"from_date":null,"to_date":null,"registration_date":null,"start_date":null,"end_date":null,"open_date":null,"close_date":null,"start_subscription_date":null,"end_subscription_date":null,"option_election_date":null,"withdrawal_rights_from_date":null,"withdrawal_rights_to_date":null,"notification_date":null,"financial_year_end_date":null,"exp_completion_date":null,"payment_type":"S","option_id":"1","serial_id":"1","default_option_flag":true,"rate_currency":"USD","ratio_old":1000.0,"ratio_new":1.0,"fraction":null,"outturn_style":"NEWO","outturn_security_type":"EQS","outturn_security_id":"S-799713","outturn_isin":"US2288913054","outturn_us_code":"228891305","outturn_local_code":"BOMOD","outturn_bbg_comp_id":"BBG000VCV3H9","outturn_bbg_comp_ticker":"BOMO US","outturn_figi":"BBG000VCV726","outturn_figi_ticker":"BOMO UV","min_offer_qty":null,"max_offer_qty":null,"min_qualify_qty":null,"max_qualify_qty":null,"min_accept_qty":null,"max_accept_qty":null,"tender_strike_price":null,"tender_price_step":null,"option_expiry_time":null,"option_expiry_tz":null,"withdrawal_rights_flag":null,"withdrawal_rights_expiry_time":null,"withdrawal_rights_expiry_tz":null,"expiry_time":null,"expiry_tz":null,"date_info":{},"rate_info":{"par_value_old":0.01,"par_value_new":0.01},"event_info":{},"ts_created":"2024-07-22T01:43:15.099184Z"}
2+
{"ts_record":"2023-11-01T00:00:00Z","event_unique_id":"U-40179043276-1888618","event_id":"E-9043276-RSPLT","listing_id":"L-1888618","listing_group_id":"LG-888618","security_id":"S-799713","issuer_id":"I-119138","event_action":"U","event":"RSPLT","event_subtype":"CONSD","event_date_label":"ex_date","event_date":"2023-10-10","event_created_date":"2023-02-09","effective_date":null,"ex_date":"2023-10-10","record_date":null,"record_date_id":"D-9043276","related_event":null,"related_event_id":null,"global_status":"A","listing_status":"T","listing_source":"M","listing_date":"2008-03-18","delisting_date":null,"issuer_name":"Bowmo Inc","security_type":"EQS","security_description":"Ordinary Shares","primary_exchange":"USOTC","exchange":"USOTC","operating_mic":"OTCM","symbol":"BOMO","nasdaq_symbol":"BOMO","local_code":"BOMO","isin":"US2288912064","us_code":"228891206","bbg_comp_id":"BBG000VCV3H9","bbg_comp_ticker":"BOMO US","figi":"BBG000VCV726","figi_ticker":"BOMO UV","listing_country":"US","register_country":"US","trading_currency":"USD","multi_currency":false,"segment_mic_name":"Pink Current Information","segment_mic":"PINC","mand_volu_flag":"M","rd_priority":1,"lot_size":100,"par_value":0.01,"par_value_currency":"USD","payment_date":null,"duebills_redemption_date":null,"from_date":null,"to_date":null,"registration_date":null,"start_date":null,"end_date":null,"open_date":null,"close_date":null,"start_subscription_date":null,"end_subscription_date":null,"option_election_date":null,"withdrawal_rights_from_date":null,"withdrawal_rights_to_date":null,"notification_date":null,"financial_year_end_date":null,"exp_completion_date":null,"payment_type":"S","option_id":"1","serial_id":"1","default_option_flag":true,"rate_currency":"USD","ratio_old":1000.0,"ratio_new":1.0,"fraction":null,"outturn_style":"NEWO","outturn_security_type":"EQS","outturn_security_id":"S-799713","outturn_isin":"US2288913054","outturn_us_code":"228891305","outturn_local_code":"BOMOD","outturn_bbg_comp_id":"BBG000VCV3H9","outturn_bbg_comp_ticker":"BOMO US","outturn_figi":"BBG000VCV726","outturn_figi_ticker":"BOMO UV","min_offer_qty":null,"max_offer_qty":null,"min_qualify_qty":null,"max_qualify_qty":null,"min_accept_qty":null,"max_accept_qty":null,"tender_strike_price":null,"tender_price_step":null,"option_expiry_time":null,"option_expiry_tz":null,"withdrawal_rights_flag":null,"withdrawal_rights_expiry_time":null,"withdrawal_rights_expiry_tz":null,"expiry_time":null,"expiry_tz":null,"date_info":{},"rate_info":{"par_value_old":0.01,"par_value_new":0.01},"event_info":{},"ts_created":"2024-07-23T00:00:00.000000Z"}

tests/test_reference_corporate.py

Lines changed: 135 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from unittest.mock import MagicMock
66

77
import databento as db
8+
import pandas as pd
89
import pytest
910
import requests
1011
from databento.reference.client import Reference
@@ -13,35 +14,66 @@
1314

1415

1516
@pytest.mark.parametrize(
16-
("events", "data_events"),
17+
(
18+
"events",
19+
"countries",
20+
"security_types",
21+
"expected_events",
22+
"expected_countries",
23+
"expected_security_types",
24+
),
1725
[
1826
[
1927
None,
2028
None,
29+
None,
30+
None,
31+
None,
32+
None,
2133
],
2234
[
2335
[],
36+
[],
37+
[],
38+
None,
39+
None,
2440
None,
2541
],
2642
[
2743
"DIV",
44+
"US",
45+
"EQS",
2846
"DIV",
47+
"US",
48+
"EQS",
2949
],
3050
[
3151
"DIV,LIQ",
52+
"US,CA",
53+
"EQS,ETF",
3254
"DIV,LIQ",
55+
"US,CA",
56+
"EQS,ETF",
3357
],
3458
[
3559
["DIV", "LIQ"],
60+
["US", "CA"],
61+
["EQS", "ETF"],
3662
"DIV,LIQ",
63+
"US,CA",
64+
"EQS,ETF",
3765
],
3866
],
3967
)
4068
def test_corporate_actions_get_range_sends_expected_request(
4169
monkeypatch: pytest.MonkeyPatch,
4270
reference_client: Reference,
4371
events: Iterable[str] | str | None,
44-
data_events: str,
72+
countries: Iterable[str] | str | None,
73+
security_types: Iterable[str] | str | None,
74+
expected_events: str,
75+
expected_countries: str,
76+
expected_security_types: str,
4577
) -> None:
4678
# Arrange
4779
mock_response = MagicMock()
@@ -57,6 +89,8 @@ def test_corporate_actions_get_range_sends_expected_request(
5789
start="2024-01",
5890
end="2024-04",
5991
events=events,
92+
countries=countries,
93+
security_types=security_types,
6094
)
6195

6296
# Assert
@@ -70,16 +104,108 @@ def test_corporate_actions_get_range_sends_expected_request(
70104
assert call["data"] == {
71105
"start": "2024-01",
72106
"end": "2024-04",
107+
"index": "event_date",
73108
"symbols": "AAPL",
74109
"stype_in": "raw_symbol",
75-
"events": data_events,
76-
"us_only": False,
110+
"events": expected_events,
111+
"countries": expected_countries,
112+
"security_types": expected_security_types,
77113
}
78114
assert call["timeout"] == (100, 100)
79115
assert isinstance(call["auth"], requests.auth.HTTPBasicAuth)
80116

81117

82-
def test_corporate_actions_get_range_response_parsing(
118+
def test_corporate_actions_get_range_response_parsing_as_pit(
119+
monkeypatch: pytest.MonkeyPatch,
120+
reference_client: Reference,
121+
) -> None:
122+
# Arrange
123+
data_path = Path(TESTS_ROOT) / "data" / "REFERENCE" / "test_data.corporate-actions.ndjson"
124+
mock_response = MagicMock()
125+
mock_response.text = data_path.read_text()
126+
mock_response.__enter__.return_value = mock_response
127+
mock_response.__exit__ = MagicMock()
128+
monkeypatch.setattr(requests, "post", MagicMock(return_value=mock_response))
129+
130+
# Act
131+
df_raw = reference_client.corporate_actions.get_range(
132+
symbols="AAPL",
133+
stype_in="raw_symbol",
134+
start="2024-01",
135+
end="2024-04",
136+
pit=True,
137+
)
138+
139+
# Assert
140+
assert len(df_raw) == 2
141+
assert df_raw.index.name == "event_date"
142+
assert df_raw.index.is_monotonic_increasing
143+
# Assert the columns were dropped
144+
for col in ["date_info", "rate_info", "event_info"]:
145+
assert col not in df_raw.columns
146+
147+
148+
def test_corporate_actions_get_range_response(
149+
monkeypatch: pytest.MonkeyPatch,
150+
reference_client: Reference,
151+
) -> None:
152+
# Arrange
153+
data_path = Path(TESTS_ROOT) / "data" / "REFERENCE" / "test_data.corporate-actions-pit.ndjson"
154+
mock_response = MagicMock()
155+
mock_response.text = data_path.read_text()
156+
mock_response.__enter__.return_value = mock_response
157+
mock_response.__exit__ = MagicMock()
158+
monkeypatch.setattr(requests, "post", MagicMock(return_value=mock_response))
159+
160+
# Act
161+
df_raw = reference_client.corporate_actions.get_range(
162+
symbols="AAPL",
163+
index="ts_record",
164+
start="2024-01",
165+
end="2024-04",
166+
pit=False,
167+
)
168+
169+
# Assert
170+
assert len(df_raw) == 1
171+
assert df_raw.index[0] == pd.Timestamp("2023-11-01 00:00:00", tz="UTC")
172+
173+
174+
def test_corporate_actions_get_range_with_ts_record_index(
175+
monkeypatch: pytest.MonkeyPatch,
176+
reference_client: Reference,
177+
) -> None:
178+
# Arrange
179+
data_path = Path(TESTS_ROOT) / "data" / "REFERENCE" / "test_data.corporate-actions.ndjson"
180+
mock_response = MagicMock()
181+
mock_response.text = data_path.read_text()
182+
mock_response.__enter__.return_value = mock_response
183+
mock_response.__exit__ = MagicMock()
184+
monkeypatch.setattr(requests, "post", MagicMock(return_value=mock_response))
185+
186+
# Act
187+
df_raw = reference_client.corporate_actions.get_range(
188+
symbols="AAPL",
189+
stype_in="raw_symbol",
190+
index="ts_record",
191+
start="2024-01",
192+
end="2024-04",
193+
)
194+
195+
expected_index = pd.DatetimeIndex(
196+
[
197+
"2023-10-10 04:37:14+00:00",
198+
"2023-10-10 04:37:14+00:00",
199+
],
200+
name="ts_record",
201+
)
202+
203+
# Assert
204+
assert len(df_raw) == 2
205+
assert df_raw.index.equals(expected_index)
206+
207+
208+
def test_corporate_actions_get_range_without_flattening(
83209
monkeypatch: pytest.MonkeyPatch,
84210
reference_client: Reference,
85211
) -> None:
@@ -97,7 +223,11 @@ def test_corporate_actions_get_range_response_parsing(
97223
stype_in="raw_symbol",
98224
start="2024-01",
99225
end="2024-04",
226+
flatten=False,
100227
)
101228

102229
# Assert
103230
assert len(df_raw) == 2
231+
# Assert the columns were retained
232+
for col in ["date_info", "rate_info", "event_info"]:
233+
assert col in df_raw.columns

0 commit comments

Comments
 (0)