Skip to content

Commit cf20de1

Browse files
committed
refactor: updated package to utilize uv, updated lock and pyproject.toml
1 parent 6519e3d commit cf20de1

File tree

17 files changed

+1893
-2765
lines changed

17 files changed

+1893
-2765
lines changed

.flake8

Lines changed: 0 additions & 8 deletions
This file was deleted.

Makefile

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,6 @@ UID="$(shell id -u)"
33
DOCKER_BUILDKIT=1
44
BUILDKIT_PROGRESS=plain
55

6-
.PHONY: check
7-
8-
check:
9-
poetry install
10-
poetry run isort curator/
11-
poetry run black curator/
12-
poetry run flake8 curator/
13-
146
DESCRIPTION="DB Update"
157
update_db:
168
alembic revision --autogenerate -m $(DESCRIPTION)

README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,16 @@ We can then use the policy and the bot to identify options spread positions that
1010

1111
## Getting Started
1212

13-
This is a python project, and will utilize pyenv, poetry, pre-commit, and will be greatly benefitted by an Nvidia GPU for training. This project will build on the default `dev` branch and push to `prod` via "Squash Merge" PR only. At which time, we will cut a new release / tag for the project.
13+
This is a python project, and will utilize uv to manage dependencies. This project will build on the default `dev` branch and push to `prod` via "Squash Merge" PR only. At which time, we will cut a new release / tag for the project.
1414

15-
### PyEnv
15+
### UV
1616

17-
Install [pyenv](https://github.com/pyenv/pyenv) and use it to create a virtualenv for this project with a version of python that is >=3.11
17+
Install [uv](https://docs.astral.sh/uv/getting-started/installation/) and use it to create a venv for this project with a version of python that is >=3.11
1818

19-
### Poetry
19+
```bash
20+
uv venv --python 3.11
21+
```
2022

21-
We use [Poetry](https://python-poetry.org/) for dependency management. Once poetry is installed on your machine and you've cloned this repo, go into the project root `curator` directory and run `poetry install` to install all dependencies indicated in the pyTOML.
2223

2324
### Data Sources
2425

curator/data_pipeline/QuotePool.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -259,19 +259,18 @@ def clean_o_ticker_progress(self):
259259

260260
elif (
261261
len(
262-
(
263-
self.o_ticker_queue_progress.get(otkr, set())
264-
| self.o_ticker_skip_tids.get(otkr, set())
265-
)
262+
(self.o_ticker_queue_progress.get(otkr, set()) | self.o_ticker_skip_tids.get(otkr, set()))
266263
- self.tid_result_progress
267264
)
268265
== 0
269266
):
270267
self.completely_processed_otkrs.append(otkr)
271-
log.info(f"all processed for {otkr}!! \
268+
log.info(
269+
f"all processed for {otkr}!! \
272270
({len(self.o_ticker_queue_progress.get(otkr, []))} processed, \
273271
{total_tids - len(self.o_ticker_queue_progress.get(otkr, []))} will be skipped, \
274-
{total_tids} expected)")
272+
{total_tids} expected)"
273+
)
275274

276275

277276
class QuotePool(Pool):

curator/data_pipeline/download.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
from datetime import datetime
22

33
from aiomultiprocess import Pool
4-
from data_pipeline.exceptions import (
4+
5+
from curator.data_pipeline.exceptions import (
56
InvalidArgs,
67
ProjBaseException,
78
ProjClientConnectionError,
89
ProjClientResponseError,
910
ProjIndexError,
1011
ProjTimeoutError,
1112
)
12-
from data_pipeline.polygon_utils import (
13+
from curator.data_pipeline.polygon_utils import (
1314
CurrentContractSnapshot,
1415
HistoricalOptionsPrices,
1516
HistoricalQuotes,
@@ -18,10 +19,9 @@
1819
PolygonPaginator,
1920
StockMetaData,
2021
)
21-
from data_pipeline.QuotePool import QuotePool
22-
from db_tools.queries import lookup_multi_ticker_ids
23-
from db_tools.utils import OptionTicker
24-
22+
from curator.data_pipeline.QuotePool import QuotePool
23+
from curator.db_tools.queries import lookup_multi_ticker_ids
24+
from curator.db_tools.utils import OptionTicker
2525
from curator.proj_constants import POLYGON_BASE_URL, log
2626
from curator.utils import pool_kwarg_config
2727

@@ -142,7 +142,7 @@ async def download_options_quotes(ticker: str, o_tickers: list[OptionTicker], mo
142142
for i in range(0, len(batch_o_tickers), BATCH_SIZE_OTICKERS):
143143
small_batch = batch_o_tickers[i : i + BATCH_SIZE_OTICKERS]
144144

145-
log.info(f"downloading quotes for {i+BATCH_SIZE_OTICKERS}/{len(batch_o_tickers)} batch of o_tickers")
145+
log.info(f"downloading quotes for {i + BATCH_SIZE_OTICKERS}/{len(batch_o_tickers)} batch of o_tickers")
146146

147147
await api_quote_downloader(
148148
paginator=op_quotes,

curator/data_pipeline/main.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,18 @@
22
from datetime import datetime
33

44
import typer
5-
from data_pipeline.orchestrator import (
5+
6+
from curator.data_pipeline.orchestrator import (
67
import_all,
78
import_partial,
89
remove_tickers_from_universe,
910
)
10-
1111
from curator.utils import months_ago
1212

1313
DEFAULT_MONTHS_HIST = 24
1414
DEFAULT_START_DATE = months_ago(months=DEFAULT_MONTHS_HIST)
1515

16-
app = typer.Typer(
17-
help="CLI for adding stocks to the data pull process and for refreshing stock/options pricing data"
18-
)
16+
app = typer.Typer(help="CLI for adding stocks to the data pull process and for refreshing stock/options pricing data")
1917

2018

2119
def validate_partial(ctx, param, value: list[int]):

curator/data_pipeline/orchestrator.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
11
from datetime import datetime
22

3-
from data_pipeline.download import (
3+
from pandas import DataFrame
4+
5+
from curator.data_pipeline.download import (
46
download_options_contracts,
57
download_options_prices,
68
download_options_quotes,
79
download_options_snapshots,
810
download_stock_metadata,
911
download_stock_prices,
1012
)
11-
from data_pipeline.uploader import (
13+
from curator.data_pipeline.uploader import (
1214
upload_options_contracts,
1315
upload_options_prices,
1416
upload_options_quotes,
1517
upload_options_snapshots,
1618
upload_stock_metadata,
1719
upload_stock_prices,
1820
)
19-
from db_tools.queries import delete_stock_ticker, latest_date_per_ticker
20-
from db_tools.utils import generate_o_ticker_lookup, pull_tickers_from_db
21-
from pandas import DataFrame
22-
21+
from curator.db_tools.queries import delete_stock_ticker, latest_date_per_ticker
22+
from curator.db_tools.utils import generate_o_ticker_lookup, pull_tickers_from_db
2323
from curator.proj_constants import log
2424

2525

@@ -79,9 +79,7 @@ async def import_all(tickers: list, start_date: datetime, end_date: datetime, mo
7979
await upload_stock_prices(ticker_lookup)
8080

8181

82-
async def import_partial(
83-
partial: list[int], tickers: list, start_date: datetime, end_date: datetime, months_hist: int
84-
):
82+
async def import_partial(partial: list[int], tickers: list, start_date: datetime, end_date: datetime, months_hist: int):
8583
"""This will download, clean, and upload data for the components specified in `partial`
8684
This is meant to be used on an adhoc basis to fill in data gaps or backfill changes
8785
"""
@@ -128,9 +126,7 @@ async def import_partial(
128126
for ticker in final_tickers:
129127
ticker_counter += 1
130128
log.info(f"downloading quotes for {ticker} ({ticker_counter}/{len(final_tickers)})")
131-
await download_options_quotes(
132-
ticker=ticker, o_tickers=list(o_tickers.values()), months_hist=months_hist
133-
)
129+
await download_options_quotes(ticker=ticker, o_tickers=list(o_tickers.values()), months_hist=months_hist)
134130
temp_paths = await upload_options_quotes(ticker)
135131
failed_paths.append(temp_paths)
136132
log.info(f"failed to parse these paths: {failed_paths}")
@@ -145,9 +141,7 @@ async def remove_tickers_from_universe(tickers: list[str]):
145141
log.info(f"ticker {ticker} successfully deleted")
146142

147143

148-
async def refresh_import(
149-
tickers: list, start_date: datetime, end_date: datetime, months_hist: int, partial: list[int]
150-
):
144+
async def refresh_import(tickers: list, start_date: datetime, end_date: datetime, months_hist: int, partial: list[int]):
151145
"""refreshes all or partial components for the given tickers
152146
start date and end date is set to today, start date from the most recent going back to 24 months at most"""
153147
all_ = True if len(tickers) == 0 else False

curator/data_pipeline/path_runner.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,15 @@
44
from json import JSONDecodeError
55
from typing import Any, Generator
66

7-
from db_tools.queries import (
7+
from curator.db_tools.queries import (
88
update_options_prices,
99
update_options_quotes,
1010
update_options_snapshot,
1111
update_options_tickers,
1212
update_stock_metadata,
1313
update_stock_prices,
1414
)
15-
from db_tools.utils import OptionTicker
16-
15+
from curator.db_tools.utils import OptionTicker
1716
from curator.proj_constants import BASE_DOWNLOAD_PATH, POSTGRES_BATCH_MAX, log
1817
from curator.utils import (
1918
clean_o_ticker,
@@ -136,10 +135,7 @@ def generate_path_args(self) -> list[tuple[str]]:
136135
if self.all_:
137136
return [(self._determine_most_recent_file(self.base_directory),)]
138137
else:
139-
return [
140-
(self._determine_most_recent_file(f"{self.base_directory}/{ticker}"),)
141-
for ticker in self.tickers
142-
]
138+
return [(self._determine_most_recent_file(f"{self.base_directory}/{ticker}"),) for ticker in self.tickers]
143139

144140
def clean_data(self, results: list[dict], ticker_data: tuple = ()):
145141
clean_results = []

curator/data_pipeline/polygon_utils.py

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
ClientResponseError,
1414
ServerDisconnectedError,
1515
)
16-
from data_pipeline.exceptions import ProjAPIError, ProjAPIOverload
1716
from dateutil.relativedelta import relativedelta
18-
from db_tools.utils import OptionTicker
1917

18+
from curator.data_pipeline.exceptions import ProjAPIError, ProjAPIOverload
19+
from curator.db_tools.utils import OptionTicker
2020
from curator.proj_constants import BASE_DOWNLOAD_PATH, POLYGON_API_KEY, POLYGON_BASE_URL, log
2121
from curator.utils import (
2222
extract_underlying_from_o_ticker,
@@ -102,9 +102,7 @@ async def _execute_request(self, session: ClientSession, url: str, payload: dict
102102
json_response = await response.json() if status_code == 200 else {}
103103
return (status_code, json_response)
104104

105-
async def _query_all(
106-
self, session: ClientSession, url: str, payload: dict = {}, limit: bool = False
107-
) -> list[dict]:
105+
async def _query_all(self, session: ClientSession, url: str, payload: dict = {}, limit: bool = False) -> list[dict]:
108106
"""Query the API until all results have been returned
109107
Args:
110108
session: aiohttp ClientSession
@@ -238,9 +236,7 @@ def generate_request_args(self, args_data: list[str] = []):
238236
Returns:
239237
urls: list(tuple), each tuple contains the url, the payload for the request and an empty string"""
240238
if not self.all_:
241-
urls = [
242-
(self.url_base, dict(self.payload, **{"ticker": ticker}), ticker) for ticker in self.tickers
243-
]
239+
urls = [(self.url_base, dict(self.payload, **{"ticker": ticker}), ticker) for ticker in self.tickers]
244240
else:
245241
urls = [(self.url_base, self.payload, "")]
246242
return urls
@@ -510,18 +506,14 @@ def __init__(self, o_ticker_lookup: dict[str, int], months_hist: int = 24):
510506
self.start_date, self.close_date = self._determine_start_end_dates(
511507
string_to_date("2060-01-01")
512508
) # NOTE: magic number meant to always trigger the newest date (today)
513-
self.dates = trading_days_in_range(
514-
str(self.start_date), str(self.close_date), count=False, cal_type="o_cal"
515-
)
509+
self.dates = trading_days_in_range(str(self.start_date), str(self.close_date), count=False, cal_type="o_cal")
516510
self.dates_stamps = self._prepare_timestamps(self.dates)
517511
self.o_ticker_lookup = o_ticker_lookup
518512

519513
def _construct_url(self, o_ticker: str) -> str:
520514
return f"/v3/quotes/{o_ticker}"
521515

522-
def generate_request_args(
523-
self, args_data: list[OptionTicker]
524-
) -> tuple[list[tuple[str, dict]], dict[str, int]]:
516+
def generate_request_args(self, args_data: list[OptionTicker]) -> tuple[list[tuple[str, dict]], dict[str, int]]:
525517
"""Generate the urls to query the options quotes endpoint.
526518
Inputs should be OptionTickers. We then generate the date ranges.
527519
To prepare the args, we make the timestamp pairs (1 hour wide) and query for the oldest quote in each window.
@@ -563,14 +555,14 @@ def _prepare_timestamps(dates: pd.DataFrame) -> list[int]:
563555
dates = dates.tz_localize("US/Eastern")
564556
for i in range(9):
565557
if i >= 7:
566-
dates[f"{i+9}_oclock"] = dates.index + pd.Timedelta(hours=i + 9)
558+
dates[f"{i + 9}_oclock"] = dates.index + pd.Timedelta(hours=i + 9)
567559
else:
568-
dates[f"{i+9}_oclock"] = dates.index + pd.Timedelta(hours=i + 9, minutes=30)
569-
dates[f"{i+9}_oclock"] = dates[f"{i+9}_oclock"].dt.strftime("%Y-%m-%dT%H:%M:%S%z")
570-
dates[f"{i+9}_oclock"] = (
571-
dates[f"{i+9}_oclock"].astype(str).str.slice(stop=-2)
560+
dates[f"{i + 9}_oclock"] = dates.index + pd.Timedelta(hours=i + 9, minutes=30)
561+
dates[f"{i + 9}_oclock"] = dates[f"{i + 9}_oclock"].dt.strftime("%Y-%m-%dT%H:%M:%S%z")
562+
dates[f"{i + 9}_oclock"] = (
563+
dates[f"{i + 9}_oclock"].astype(str).str.slice(stop=-2)
572564
+ ":"
573-
+ dates[f"{i+9}_oclock"].astype(str).str.slice(start=-2)
565+
+ dates[f"{i + 9}_oclock"].astype(str).str.slice(start=-2)
574566
)
575567
dates.drop(columns=["market_open", "market_close"], inplace=True)
576568
dates = pd.DataFrame({"timestamp.gte": dates.values.flatten()})
@@ -618,11 +610,7 @@ async def download_data(self, o_ticker: str, payload: dict, session: ClientSessi
618610
def lookup_date_timestamps_from_record(self, timestamp: int) -> list[int]:
619611
date = timestamp_to_datetime(timestamp, msec_units=False, nano_sec=True)
620612
date = str(date.date())
621-
return (
622-
self.dates_stamps["nanosecond.gte"]
623-
.loc[self.dates_stamps["timestamp.gte"].str.contains(date)]
624-
.to_list()
625-
)
613+
return self.dates_stamps["nanosecond.gte"].loc[self.dates_stamps["timestamp.gte"].str.contains(date)].to_list()
626614

627615
# TODO: finish this algorithm
628616
def search_for_timestamps(self, data: list[dict]) -> list[dict]:

curator/data_pipeline/uploader.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
from typing import Any
33

44
from aiomultiprocess import Pool
5-
from data_pipeline.path_runner import (
5+
6+
from curator.data_pipeline.path_runner import (
67
MetaDataRunner,
78
OptionsContractsRunner,
89
OptionsPricesRunner,
@@ -11,7 +12,6 @@
1112
PathRunner,
1213
StockPricesRunner,
1314
)
14-
1515
from curator.proj_constants import CPUS, log
1616
from curator.utils import pool_kwarg_config
1717

@@ -31,9 +31,7 @@ async def etl_pool_uploader(runner: PathRunner, pool_kwargs: dict = {}, path_inp
3131
"""
3232

3333
log.info(f"generating the path args to be uploaded -- {runner.runner_type}")
34-
path_args = (
35-
runner.generate_path_args() if not path_input_args else runner.generate_path_args(path_input_args)
36-
)
34+
path_args = runner.generate_path_args() if not path_input_args else runner.generate_path_args(path_input_args)
3735

3836
log.info(
3937
f"uploading data to the database -- Starting Process Pool -- Upload Function: {runner.upload_func.__qualname__}"

0 commit comments

Comments
 (0)