|
| 1 | +import pandas as pd |
| 2 | +from pathlib import Path |
| 3 | +from syscore.interactive.input import ( |
| 4 | + get_input_from_user_and_convert_to_type, |
| 5 | +) |
| 6 | +from sysinit.futures.multiple_and_adjusted_from_csv_to_db import ( |
| 7 | + init_db_with_csv_prices_for_code, |
| 8 | +) |
| 9 | +from sysinit.futures.multipleprices_from_db_prices_and_csv_calendars_to_db import ( |
| 10 | + process_multiple_prices_single_instrument, |
| 11 | +) |
| 12 | +from sysinit.futures.rollcalendars_from_db_prices_to_csv import ( |
| 13 | + build_and_write_roll_calendar, |
| 14 | +) |
| 15 | +from sysproduction.data.prices import get_valid_instrument_code_from_user |
| 16 | + |
| 17 | +proj_dir = Path.cwd() |
| 18 | +default_path_base = proj_dir / "data" / "futures" |
| 19 | + |
| 20 | +path_base_str = get_input_from_user_and_convert_to_type( |
| 21 | + "Base dir for temp files?", |
| 22 | + type_expected=str, |
| 23 | + default_value=str(default_path_base), |
| 24 | +) |
| 25 | + |
| 26 | +path_base = Path(path_base_str) |
| 27 | + |
| 28 | +roll_calendars_from_db = path_base / "roll_calendars_from_db" |
| 29 | +multiple_prices_from_db = path_base / "multiple_from_db" |
| 30 | +spliced_multiple_prices = path_base / "multiple_prices_csv_spliced" |
| 31 | + |
| 32 | +if not roll_calendars_from_db.exists(): |
| 33 | + roll_calendars_from_db.mkdir() |
| 34 | + |
| 35 | +if not multiple_prices_from_db.exists(): |
| 36 | + multiple_prices_from_db.mkdir() |
| 37 | + |
| 38 | +if not spliced_multiple_prices.exists(): |
| 39 | + spliced_multiple_prices.mkdir() |
| 40 | + |
| 41 | +instrument_code = get_valid_instrument_code_from_user(source="multiple") |
| 42 | +build_and_write_roll_calendar( |
| 43 | + instrument_code, output_datapath=str(roll_calendars_from_db) |
| 44 | +) |
| 45 | +input("Review roll calendar, press Enter to continue") |
| 46 | + |
| 47 | +process_multiple_prices_single_instrument( |
| 48 | + instrument_code, |
| 49 | + csv_multiple_data_path=str(multiple_prices_from_db), |
| 50 | + csv_roll_data_path=str(roll_calendars_from_db), |
| 51 | + ADD_TO_DB=False, |
| 52 | + ADD_TO_CSV=True, |
| 53 | +) |
| 54 | +input("Review multiple prices, press Enter to continue") |
| 55 | + |
| 56 | +supplied_file = path_base / "multiple_prices_csv" / f"{instrument_code}.csv" |
| 57 | +generated_file = multiple_prices_from_db / f"{instrument_code}.csv" |
| 58 | + |
| 59 | +supplied = pd.read_csv(supplied_file, index_col=0, parse_dates=True) |
| 60 | +generated = pd.read_csv(generated_file, index_col=0, parse_dates=True) |
| 61 | + |
| 62 | +# get final datetime of the supplied multiple_prices for this instrument |
| 63 | +last_supplied = supplied.index[-1] |
| 64 | + |
| 65 | +print( |
| 66 | + f"last datetime of supplied prices {last_supplied}, first datetime of updated " |
| 67 | + f"prices is {generated.index[0]}" |
| 68 | +) |
| 69 | + |
| 70 | +# assuming the latter is later than the former, truncate the generated data: |
| 71 | +generated = generated.loc[last_supplied:] |
| 72 | + |
| 73 | +# if first datetime in generated is the same as last datetime in repo, skip that row |
| 74 | +first_generated = generated.index[0] |
| 75 | +if first_generated == last_supplied: |
| 76 | + generated = generated.iloc[1:] |
| 77 | + |
| 78 | +# check we're using the same price and forward contracts |
| 79 | +# (i.e. no rolls missing, which there shouldn't be if there is date overlap) |
| 80 | +try: |
| 81 | + assert ( |
| 82 | + supplied.iloc[-1].PRICE_CONTRACT |
| 83 | + == generated.loc[last_supplied:].iloc[0].PRICE_CONTRACT |
| 84 | + ) |
| 85 | + assert ( |
| 86 | + supplied.iloc[-1].FORWARD_CONTRACT |
| 87 | + == generated.loc[last_supplied:].iloc[0].FORWARD_CONTRACT |
| 88 | + ) |
| 89 | +except AssertionError as e: |
| 90 | + print(supplied) |
| 91 | + print(generated) |
| 92 | + raise e |
| 93 | +# nb we don't assert that the CARRY_CONTRACT is the same for supplied and generated, |
| 94 | +# as some rolls implicit in the supplied multiple_prices don't match the pattern in |
| 95 | +# the rollconfig.csv |
| 96 | + |
| 97 | +spliced = pd.concat([supplied, generated]) |
| 98 | +spliced.to_csv(spliced_multiple_prices / f"{instrument_code}.csv") |
| 99 | + |
| 100 | +init_db_with_csv_prices_for_code( |
| 101 | + instrument_code, multiple_price_datapath=str(spliced_multiple_prices) |
| 102 | +) |
0 commit comments