Skip to content

Commit 23b4eab

Browse files
CopilotElijas
andcommitted
Add support for bulk metadata retrieval for multiple accession numbers
Co-authored-by: Elijas <4084885+Elijas@users.noreply.github.com>
1 parent 1b0f887 commit 23b4eab

File tree

4 files changed

+82
-7
lines changed

4 files changed

+82
-7
lines changed

nbs/00_types.ipynb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,19 @@
186186
" ), f\"Test case failed for input: {test_input}. Expected: {expected}, Got: {result}\""
187187
]
188188
},
189+
{
190+
"cell_type": "code",
191+
"execution_count": null,
192+
"metadata": {},
193+
"outputs": [],
194+
"source": [
195+
"# | export\n",
196+
"@dataclass\n",
197+
"class AccessionNumbers:\n",
198+
" ticker_or_cik: str\n",
199+
" accession_numbers: list[str]"
200+
]
201+
},
189202
{
190203
"cell_type": "code",
191204
"execution_count": null,

sec_downloader/core.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@
88
from sec_downloader.sec_edgar_downloader_fork import (
99
FilingMetadata,
1010
get_filing_metadata,
11+
get_filing_metadatas_for_accession_numbers,
1112
get_latest_filings_metadata,
1213
)
13-
from sec_downloader.types import CompanyAndAccessionNumber, RequestedFilings
14+
from sec_downloader.types import AccessionNumbers, CompanyAndAccessionNumber, RequestedFilings
1415

1516
FileContent = namedtuple("FileContent", ["path", "content"])
1617
DEFAULT_FILTER_PATTERN = "**/*.*"
@@ -32,7 +33,7 @@ def user_agent(self):
3233

3334
def get_filing_metadatas(
3435
self,
35-
query: Union[str, RequestedFilings, CompanyAndAccessionNumber],
36+
query: Union[str, RequestedFilings, CompanyAndAccessionNumber, AccessionNumbers],
3637
*,
3738
include_amends: bool = False,
3839
) -> list[FilingMetadata]:
@@ -54,6 +55,15 @@ def get_filing_metadatas(
5455
)
5556
]
5657

58+
if isinstance(query, AccessionNumbers):
59+
return get_filing_metadatas_for_accession_numbers(
60+
ticker_or_cik=query.ticker_or_cik,
61+
accession_numbers=query.accession_numbers,
62+
user_agent=self.user_agent,
63+
ticker_to_cik_mapping=self._ticker_to_cik_mapping,
64+
include_amends=include_amends,
65+
)
66+
5767
if isinstance(query, (RequestedFilings, str)):
5868
if isinstance(query, str):
5969
query = RequestedFilings.from_string(query)

sec_downloader/sec_edgar_downloader_fork.py

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,46 @@ def get_filing_metadata(
4949
return result[0]
5050

5151

52+
def get_filing_metadatas_for_accession_numbers(
53+
*,
54+
ticker_or_cik: str,
55+
accession_numbers: list[str],
56+
user_agent: str,
57+
ticker_to_cik_mapping: dict[str, str],
58+
include_amends: bool = False,
59+
) -> list[FilingMetadata]:
60+
"""
61+
Get metadata for multiple accession numbers efficiently.
62+
All accession numbers must belong to the same company (ticker_or_cik).
63+
"""
64+
if not accession_numbers:
65+
return []
66+
67+
# Normalize accession numbers
68+
normalized_accession_numbers = []
69+
for accession_number in accession_numbers:
70+
if len(accession_number) == 18:
71+
accession_number = (
72+
f"{accession_number[:10]}-{accession_number[10:12]}-{accession_number[12:]}"
73+
)
74+
if not accession_number_re.match(accession_number):
75+
raise ValueError(f"Invalid Accession Number: {accession_number}")
76+
normalized_accession_numbers.append(accession_number)
77+
78+
cik = validate_and_convert_ticker_or_cik(ticker_or_cik, ticker_to_cik_mapping)
79+
80+
# Get metadata for all accession numbers in bulk
81+
result = _get_metadatas(
82+
cik=cik,
83+
user_agent=user_agent,
84+
limit=len(normalized_accession_numbers),
85+
accession_numbers=normalized_accession_numbers,
86+
include_amends=include_amends,
87+
)
88+
89+
return result
90+
91+
5292
def get_latest_filings_metadata(
5393
*,
5494
requested: RequestedFilings,
@@ -94,13 +134,14 @@ def _get_metadatas(
94134
limit: int,
95135
ticker_or_cik: Optional[str] = None,
96136
accession_number: Optional[str] = None,
137+
accession_numbers: Optional[list[str]] = None,
97138
form_type: Optional[str] = None,
98139
include_amends: bool = False,
99140
) -> list[FilingMetadata]:
100141
assert (
101142
ticker_or_cik and form_type
102-
) or accession_number, (
103-
"Either ticker or CIK and form type or accession number must be provided"
143+
) or accession_number or accession_numbers, (
144+
"Either ticker or CIK and form type or accession number(s) must be provided"
104145
)
105146

106147
submissions_uri = URL_SUBMISSIONS.format(
@@ -151,7 +192,7 @@ def _get_metadatas(
151192
this_form_type,
152193
items,
153194
) in zip(
154-
accession_numbers,
195+
filings_json["accessionNumber"],
155196
primary_document_urls,
156197
filing_dates,
157198
report_dates,
@@ -164,6 +205,7 @@ def _get_metadatas(
164205
if (
165206
(form_type and form_type != this_form_type)
166207
or (accession_number and accession_number != this_accession_number)
208+
or (accession_numbers and this_accession_number not in accession_numbers)
167209
or (is_amend and not include_amends)
168210
):
169211
continue
@@ -195,7 +237,11 @@ def _get_metadatas(
195237
submissions_uri = URL_SUBMISSIONS.format(submission=next_page)
196238

197239
requested_form = f" of type {form_type}" if form_type else ""
198-
error_context = f"{accession_number or ticker_or_cik}{requested_form}"
240+
if accession_numbers:
241+
error_context = f"{ticker_or_cik} with accession numbers {accession_numbers}"
242+
else:
243+
error_context = f"{accession_number or ticker_or_cik}{requested_form}"
244+
199245
if not found_metadatas:
200246
msg = f"Could not find any filings: {error_context}"
201247
raise ValueError(msg)

sec_downloader/types.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_types.ipynb.
22

33
# %% auto 0
4-
__all__ = ['Ticker', 'FilingMetadata', 'RequestedFilings', 'CompanyAndAccessionNumber']
4+
__all__ = ['Ticker', 'FilingMetadata', 'RequestedFilings', 'CompanyAndAccessionNumber', 'AccessionNumbers']
55

66
# %% ../nbs/00_types.ipynb 1
77
import re
@@ -75,3 +75,9 @@ def from_string(
7575
ticker_or_cik=ticker_or_cik,
7676
accession_number=accession_number,
7777
)
78+
79+
# %% ../nbs/00_types.ipynb 9
80+
@dataclass
81+
class AccessionNumbers:
82+
ticker_or_cik: str
83+
accession_numbers: list[str]

0 commit comments

Comments
 (0)