Skip to content

Commit e3a118a

Browse files
Added bulk agg download and reader scripts (#511)
1 parent c999c06 commit e3a118a

File tree

2 files changed

+154
-0
lines changed

2 files changed

+154
-0
lines changed

examples/rest/bulk_aggs_downloader.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import datetime
2+
import concurrent.futures
3+
import logging
4+
from polygon import RESTClient
5+
import signal
6+
import sys
7+
import pickle
8+
import lz4.frame # type: ignore
9+
10+
"""
11+
This script performs the following tasks:
12+
13+
1. Downloads aggregated market data (referred to as 'aggs') for specific stock symbols using the Polygon API.
14+
2. Handles data for multiple dates and performs these operations in parallel to improve efficiency.
15+
3. Saves the downloaded data in a compressed format (LZ4) using Python's pickle serialization.
16+
4. Utilizes logging to track its progress and any potential errors.
17+
5. Designed to be interruptible: listens for a Ctrl+C keyboard interrupt and exits gracefully when detected.
18+
19+
Usage:
20+
1. pip install lz4
21+
2. Set your Polygon API key in the environment variable 'POLYGON_API_KEY'.
22+
3. Specify the date range and stock symbols you are interested in within the script.
23+
4. Run the script.
24+
25+
The script will create compressed '.pickle.lz4' files containing the aggs for each specified stock symbol and date.
26+
27+
Note: This script is designed to be compatible with a data reader script, such as 'bulk_aggs_reader.py'.
28+
"""
29+
30+
# Set up logging
31+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")
32+
33+
34+
def signal_handler(sig, frame):
35+
print("You pressed Ctrl+C!")
36+
sys.exit(0)
37+
38+
39+
signal.signal(signal.SIGINT, signal_handler)
40+
41+
42+
def get_aggs_for_symbol_and_date(symbol_date_pair):
43+
"""Retrieve aggs for a given symbol and date"""
44+
symbol, date = symbol_date_pair
45+
aggs = []
46+
client = RESTClient(trace=True) # Uses POLYGON_API_KEY environment variable
47+
48+
for a in client.list_aggs(
49+
symbol,
50+
1,
51+
"minute",
52+
date,
53+
date,
54+
limit=50000,
55+
):
56+
aggs.append(a)
57+
58+
print(len(aggs))
59+
60+
filename = f"{symbol}-aggs-{date}.pickle.lz4"
61+
with open(filename, "wb") as file:
62+
try:
63+
compressed_data = lz4.frame.compress(pickle.dumps(aggs))
64+
file.write(compressed_data)
65+
except TypeError as e:
66+
print(f"Serialization Error: {e}")
67+
68+
logging.info(f"Downloaded aggs for {date} and saved to {filename}")
69+
70+
71+
def weekdays_between(start_date, end_date):
72+
"""Generate all weekdays between start_date and end_date"""
73+
day = start_date
74+
while day <= end_date:
75+
if day.weekday() < 5: # 0-4 denotes Monday to Friday
76+
yield day
77+
day += datetime.timedelta(days=1)
78+
79+
80+
def main():
81+
start_date = datetime.date(2023, 8, 1)
82+
end_date = datetime.date(2023, 8, 31)
83+
84+
symbols = ["TSLA", "AAPL", "HCP", "GOOG"] # The array of symbols you want
85+
86+
dates = list(weekdays_between(start_date, end_date))
87+
88+
# Generate a list of (symbol, date) pairs
89+
symbol_date_pairs = [(symbol, date) for symbol in symbols for date in dates]
90+
91+
# Use ThreadPoolExecutor to download data in parallel
92+
with concurrent.futures.ThreadPoolExecutor(max_workers=50) as executor:
93+
executor.map(get_aggs_for_symbol_and_date, symbol_date_pairs)
94+
95+
96+
if __name__ == "__main__":
97+
main()

examples/rest/bulk_aggs_reader.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import lz4.frame # type: ignore
2+
import pickle
3+
import datetime
4+
5+
"""
6+
This script performs the following tasks:
7+
8+
1. Reads aggregated market data ('aggs') for a specific stock symbol for multiple dates.
9+
2. Data is read from compressed (LZ4) and pickled files, which should have been generated by a separate data downloading script.
10+
3. Displays the read data to the console.
11+
4. Handles exceptions gracefully: informs the user if a file for a specific date was not found or if any other error occurred.
12+
13+
Usage:
14+
1. pip install lz4
15+
2. Ensure that the compressed '.pickle.lz4' files for the specified stock symbol and date range exist in the same directory as this script.
16+
3. Modify the date range and stock symbol in the script as per your requirements.
17+
4. Run the script.
18+
19+
The script will read and display the market data for each specified date and stock symbol.
20+
21+
Note: This script is designed to be compatible with files generated by a data downloading script, such as 'bulk_aggs_downloader.py'.
22+
"""
23+
24+
25+
def read_trades_for_date(symbol, date):
26+
"""Reads trades for a given symbol and date, then prints them."""
27+
28+
# Construct the filename, similar to your writer script
29+
filename = f"{symbol}-aggs-{date}.pickle.lz4"
30+
31+
try:
32+
with open(filename, "rb") as file:
33+
compressed_data = file.read()
34+
trades = pickle.loads(lz4.frame.decompress(compressed_data))
35+
print(trades)
36+
return trades
37+
except FileNotFoundError:
38+
print(f"No file found for {date}")
39+
except Exception as e:
40+
print(f"An error occurred: {e}")
41+
42+
43+
def main():
44+
start_date = datetime.date(2023, 8, 1)
45+
end_date = datetime.date(2023, 8, 31)
46+
symbol = "HCP"
47+
48+
# Loop through each weekday between the start and end dates and read the trades
49+
day = start_date
50+
while day <= end_date:
51+
if day.weekday() < 5: # 0-4 denotes Monday to Friday
52+
read_trades_for_date(symbol, day)
53+
day += datetime.timedelta(days=1)
54+
55+
56+
if __name__ == "__main__":
57+
main()

0 commit comments

Comments
 (0)