|
1 | | -import pandas as pd |
| 1 | +import duckdb |
2 | 2 | import logging |
3 | 3 |
|
| 4 | +import pandas as pd |
| 5 | + |
4 | 6 | from typing import List, Optional, Union |
5 | 7 |
|
6 | | -import duckdb |
7 | 8 | import pandas_market_calendars as mcal |
8 | 9 |
|
9 | 10 | logging.basicConfig(format='%(message)s ::: %(asctime)s', datefmt='%I:%M:%S %p') |
10 | 11 |
|
11 | | - |
12 | 12 | # |
13 | 13 | # Adjusting pricing data |
14 | 14 | # |
@@ -81,6 +81,7 @@ def _adjust_field(field: str, table: str) -> str: |
81 | 81 |
|
82 | 82 | return '' |
83 | 83 |
|
| 84 | + |
84 | 85 | # |
85 | 86 | # Adjusting data for universe |
86 | 87 | # |
@@ -140,7 +141,7 @@ def add_index_info(self, index_constitutes: pd.DataFrame, start_date: Union[pd.T |
140 | 141 |
|
141 | 142 | # will throw an error if there are duplicate self.__id_col |
142 | 143 | _handle_duplicates(df=index_constitutes, out_type='ValueError', name='The column symbols', |
143 | | - drop=False, subset=[self.__id_col]) |
| 144 | + drop=False, subset=[self.__id_col]) |
144 | 145 |
|
145 | 146 | # seeing if we have to convert from and thru to series of timestamps |
146 | 147 | if date_format != '': |
@@ -305,6 +306,11 @@ def _check_columns(needed: List[str], df: pd.DataFrame, index_columns: bool = Tr |
305 | 306 | return df |
306 | 307 |
|
307 | 308 |
|
| 309 | +# |
| 310 | +# utility |
| 311 | +# |
| 312 | + |
| 313 | + |
308 | 314 | def _handle_duplicates(df: pd.DataFrame, out_type: str, name: str, drop: bool = False, |
309 | 315 | subset: List[any] = None) -> pd.DataFrame: |
310 | 316 | """ |
@@ -339,10 +345,3 @@ def _handle_duplicates(df: pd.DataFrame, out_type: str, name: str, drop: bool = |
339 | 345 |
|
340 | 346 | if drop: |
341 | 347 | return df |
342 | | - |
343 | | - |
344 | | -if __name__ == '__main__': |
345 | | - df = pd.read_csv( |
346 | | - '/Users/alex/Desktop/WRDS/CRSP/Annual Update/Stock : Security Files/Daily Stock File/Daily Stock File 29251231-20211231.gz', |
347 | | - nrows=1000).drop('cfacshr', axis=1) |
348 | | - print(adjust_crsp_data(df)) |
0 commit comments