diff --git a/pdr_backend/benchmarks/plot_each_model.py b/pdr_backend/benchmarks/plot_each_model.py new file mode 100644 index 000000000..0b6507c8d --- /dev/null +++ b/pdr_backend/benchmarks/plot_each_model.py @@ -0,0 +1,128 @@ +""" + +Takes a Simulation's CSV data and plots each model by calibration. + +""" + +import os +import pandas as pd +import plotly.graph_objects as go # type: ignore + +# Example file path +FILE_PATH = "/Users/abc/Dev/ClassifLinearElasticNet Balanced_50kIterations_Summary.csv" + + +def load_data_from_csv(file_path): + """ + Loads Sim data from a CSV file into 2 dataframes. + + Returns: + Two dataframes, one with ETH data and one without. + """ + # Function body + df = pd.read_csv(file_path, na_values=[""]) + df["Calibration"] = df["Calibration"].fillna("None") + model_name = os.path.basename(file_path).split("_")[0] + df["Model"] = model_name + df_without_eth = df[ + ~df["predictoor_ss.predict_train_feedsets"].str.contains("ETH") + ].copy() + df_with_eth = df[ + df["predictoor_ss.predict_train_feedsets"].str.contains("ETH") + ].copy() + color_mapping = {"Sigmoid": "orange", "Isotonic": "blue", "None": "fuchsia"} + df_without_eth["Color"] = df_without_eth["Calibration"].map(color_mapping) + df_with_eth["Color"] = df_with_eth["Calibration"].map(color_mapping) + print( + f"Data Types:\n{df.dtypes}" + ) # Check the data types to ensure they are read correctly + return df_without_eth, df_with_eth + + +def generate_traces(df, calibrations, autoregressive_n, y_column): + """ + Generates traces for the given dataframes to be plotted. + + Returns: + List of traces. + """ + traces = [] + for calibration in calibrations: + for autoregressive in autoregressive_n: + filtered_df = df[ + (df["Calibration"] == calibration) + & ( + df["predictoor_ss.aimodel_data_ss.autoregressive_n"] + == int(autoregressive) + ) + ] + if not filtered_df.empty: + traces.append( + go.Scatter( + x=filtered_df["predictoor_ss.aimodel_data_ss.max_n_train"], + y=filtered_df[y_column], + name=f"{calibration} & Autoregressive_n = {autoregressive}", + marker={"color": filtered_df["Color"].iloc[0]}, + customdata=[calibration, autoregressive], + ) + ) + else: + print( + f"No data for {calibration} with Autoregressive_n = {autoregressive}" + ) + return traces + + +layout = { + "title": {"text": "Traces Sorted by Ascending Predictoor Profit"}, + "xaxis": { + "title": "Max_N_Train", + "tickvals": [1000, 2000, 5000], + "ticktext": ["1000", "2000", "5000"], + }, + "margin": {"l": 70, "r": 20, "t": 60, "b": 40}, + "showlegend": True, + "legend": {"title": {"text": "Traces Sorted by Ascending Predictoor Profit"}}, + "hovermode": "closest", +} + + +def plot_data(filename, calibration, autoregressive_n, y_column): + """ + Plots the data from the given CSV file. + + Returns: + Two plots, one with ETH data and one without. + """ + df_without_eth, df_with_eth = load_data_from_csv(filename) + traces_without_eth = generate_traces( + df_without_eth, calibration, autoregressive_n, y_column + ) + yaxis_title = ( + "Predictoor Profit (OCEAN)" + if y_column == "pdr_profit_OCEAN" + else "Trader Profit (USD)" + ) + fig_without_eth = go.Figure(data=traces_without_eth, layout=layout) + fig_without_eth.update_layout( + title=f"{df_without_eth['Model'].iloc[0]} - " + + f"Predictoor Profit Benchmarks (Trained with BTC-USDT Data) - {y_column}", + yaxis_title=yaxis_title, + ) + fig_without_eth.show() + traces_with_eth = generate_traces( + df_with_eth, selected_calibrations, selected_autoregressives, y_column + ) + fig_with_eth = go.Figure(data=traces_with_eth, layout=layout) + fig_with_eth.update_layout( + title=f"{df_with_eth['Model'].iloc[0]} - " + + f"Predictoor Profit Benchmarks (Trained with BTC-USDT & ETH-USDT Data) - {y_column}", + yaxis_title=yaxis_title, + ) + fig_with_eth.show() + + +selected_calibrations = ["None", "Isotonic", "Sigmoid"] +selected_autoregressives = ["1", "2"] +Y_COLUMN = "pdr_profit_OCEAN" # Example Column to plot: 'pdr_profit_OCEAN' or 'trader_profit_USD' +plot_data(FILE_PATH, selected_calibrations, selected_autoregressives, Y_COLUMN) diff --git a/pdr_backend/benchmarks/plot_model_comparison.py b/pdr_backend/benchmarks/plot_model_comparison.py new file mode 100644 index 000000000..20e4d53e2 --- /dev/null +++ b/pdr_backend/benchmarks/plot_model_comparison.py @@ -0,0 +1,160 @@ +""" + +Takes multiple Simulation CSVs for different models and plots the three most profitable traces. + +""" + +import os +import pandas as pd +import plotly.graph_objects as go # type: ignore + + +FILE_PATHS = [ + "/Users/abc/Dev/ClassifLinearLasso_Summary.csv", + "/Users/abc/Dev/Balanced ClassifLinearLasso_Summary.csv", + "/Users/abc/Dev/ClassifLinearRidge_Summary.csv", + "/Users/abc/Dev/Balanced ClassifLinearRidge_Summary.csv", + "/Users/abc/Dev/ClassifLinearElasticNet_Summary.csv", + "/Users/abc/Dev/Balanced ClassifLinearElasticNet_Summary.csv", +] + + +def load_and_process_csv(file_path): + """ + Loads Sim data from a CSV file into a dataframe. + """ + + df = pd.read_csv(file_path, na_values=[""]) + df["Calibration"] = df["Calibration"].fillna("None") + model_name = os.path.basename(file_path).split("_")[0] + df["Model"] = model_name + print(df.dtypes) # Check the data types to ensure they are read correctly + return df + + +def get_top_traces_combined(df, y_column): + """ + Gets the top 3 most profitable traces for each model, calibration, and autoregressive_n. + """ + + if "Model" not in df.columns: + raise ValueError("Model column not found in DataFrame") + grouped = df.groupby( + ["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"] + ) + max_profits = grouped[y_column].max().reset_index() + top_traces = max_profits.nlargest(3, y_column) + top_trace_indices = top_traces[ + ["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"] + ] + top_trace_full_df = df.merge( + top_trace_indices, + on=["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"], + ) + return top_trace_full_df + + +def generate_traces(df, green_shades, y_column): + """ + Generates plotly traces for each model, calibration, and autoregressive_n. + """ + + traces = [] + grouped = df.groupby( + ["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"] + ) + sorted_groups = ( + grouped[y_column].max().reset_index().sort_values(by=y_column, ascending=False) + ) # Sorting highest to lowest + + temp_traces = [] + + for _, row in sorted_groups.iterrows(): + group_df = grouped.get_group( + ( + row["Model"], + row["Calibration"], + row["predictoor_ss.aimodel_data_ss.autoregressive_n"], + ) + ) + color = green_shades.pop(0) + autoregressive_n = int( + row["predictoor_ss.aimodel_data_ss.autoregressive_n"] + ) # Ensure it's formatted as an integer + trace = go.Scatter( + x=group_df["predictoor_ss.aimodel_data_ss.max_n_train"], + y=group_df[y_column], + name=f"{row['Model']}: {row['Calibration']} & Autoregressive_n = {autoregressive_n}", + marker={"color": color}, + mode="lines+markers", + ) + temp_traces.append(trace) + + traces.extend(reversed(temp_traces)) + return traces + + +def plot_data_from_csvs(file_paths, y_column, eth_column): + """ + Loads and processes the CSV files, then passes the data to plot_data. + """ + + all_data = [] + for file_path in file_paths: + df = load_and_process_csv(file_path) + all_data.append(df) + + combined_df = pd.concat(all_data, ignore_index=True) + df_without_eth = combined_df[~combined_df[eth_column].str.contains("ETH", na=False)] + df_with_eth = combined_df[combined_df[eth_column].str.contains("ETH", na=False)] + + plot_data(df_without_eth, y_column, "(Trained on BTC-USDT Data)") + plot_data(df_with_eth, y_column, "(Trained on BTC & ETH-USDT Data)") + + +def plot_data(df, y_column, title_suffix): + """ + Formats and plots the data from the dataframe. + """ + + if "Model" not in df.columns: + raise ValueError("Model column not found in DataFrame") + top_traces_df = get_top_traces_combined(df, y_column) + green_shades = ["#267326", "#66cc66", "#adebad"] # Dark to light green + traces = generate_traces(top_traces_df, green_shades.copy(), y_column) + profit_type = ( + "Predictoor Profit (OCEAN)" + if y_column == "pdr_profit_OCEAN" + else "Trader Profit (USD)" + ) + layout = go.Layout( + title={ + "text": f"Top 3 Highest {profit_type} Scores - {title_suffix}", + "x": 0.5, + }, + xaxis={ + "title": "Max_N_Train", + "tickvals": [1000, 2000, 5000], + "ticktext": ["1000", "2000", "5000"], + }, + yaxis={ + "title": profit_type, + "tickmode": "auto", + "showgrid": True, + "tickfont": {"size": 10}, + "title_standoff": 25, + }, + margin={"l": 70, "r": 20, "t": 60, "b": 40}, + showlegend=True, + legend={"title": {"text": "Traces Sorted by Ascending Profit"}}, + hovermode="closest", + ) + fig = go.Figure(data=traces, layout=layout) + fig.show() + + +Y_COLUMN = "pdr_profit_OCEAN" # Can be 'pdr_profit_OCEAN' or 'trader_profit_USD' +ETH_COLUMN = ( + "predictoor_ss.predict_train_feedsets" # Adjust the column name as necessary +) +plot_data_from_csvs(FILE_PATHS, Y_COLUMN, ETH_COLUMN)