From cad737190cc49bc7ee887c44a5cc582e3f1f454f Mon Sep 17 00:00:00 2001
From: V <101472818+graceful-coder@users.noreply.github.com>
Date: Tue, 17 Sep 2024 10:23:41 -0400
Subject: [PATCH 1/7] Adding Benchmark Plot Files

These files are used for generating the model comparison Plots reported in Sim blogposts
---
 pdr_backend/plot_each_model.py       | 82 +++++++++++++++++++++++++
 pdr_backend/plot_model_comparison.py | 90 ++++++++++++++++++++++++++++
 2 files changed, 172 insertions(+)
 create mode 100644 pdr_backend/plot_each_model.py
 create mode 100644 pdr_backend/plot_model_comparison.py

diff --git a/pdr_backend/plot_each_model.py b/pdr_backend/plot_each_model.py
new file mode 100644
index 000000000..fa22029bc
--- /dev/null
+++ b/pdr_backend/plot_each_model.py
@@ -0,0 +1,82 @@
+import os
+import pandas as pd
+import plotly.graph_objects as go
+
+# Example file path
+# file_path = "/Users/foxylady/Dev/ClassifLinearSVM_50kIterations_Summary.csv"
+file_path = "[Enter Your File Path to CSV Data]"
+
+def load_data_from_csv(file_path):
+    df = pd.read_csv(file_path, na_values=[''])
+    df['Calibration'] = df['Calibration'].fillna('None')
+    model_name = os.path.basename(file_path).split('_')[0]
+    df['Model'] = model_name
+    df_without_eth = df[~df['predictoor_ss.predict_train_feedsets'].str.contains('ETH')].copy()
+    df_with_eth = df[df['predictoor_ss.predict_train_feedsets'].str.contains('ETH')].copy()
+    color_mapping = {
+        'Sigmoid': 'orange',
+        'Isotonic': 'blue',
+        'None': 'fuchsia'
+    }
+    df_without_eth['Color'] = df_without_eth['Calibration'].map(color_mapping)
+    df_with_eth['Color'] = df_with_eth['Calibration'].map(color_mapping)
+    print(f"Data Types:\n{df.dtypes}")  # Check the data types to ensure they are read correctly
+    return df_without_eth, df_with_eth
+
+def generate_traces(df, selected_calibrations, selected_autoregressives, y_column):
+    traces = []
+    for calibration in selected_calibrations:
+        for autoregressive in selected_autoregressives:
+            filtered_df = df[(df['Calibration'] == calibration) & (df['predictoor_ss.aimodel_data_ss.autoregressive_n'] == int(autoregressive))]
+            if not filtered_df.empty:
+                traces.append(
+                    go.Scatter(
+                        x=filtered_df['predictoor_ss.aimodel_data_ss.max_n_train'],
+                        y=filtered_df[y_column],
+                        name=f"{calibration} & Autoregressive_n = {autoregressive}",
+                        marker=dict(color=filtered_df['Color'].iloc[0]),
+                        customdata=[calibration, autoregressive]
+                    )
+                )
+            else:
+                print(f"No data for {calibration} with Autoregressive_n = {autoregressive}")
+    return traces
+
+layout = go.Layout(
+    title="ClassifLinearRidge_Balanced Predictoor Profit Benchmarks for Three Calibrations",
+    xaxis=dict(
+        title="Max_N_Train",
+        tickvals=[1000, 2000, 5000],
+        ticktext=["1000", "2000", "5000"]
+    ),
+    margin=dict(
+        l=70,
+        r=20,
+        t=60,
+        b=40
+    ),
+    showlegend=True,
+    legend=dict(
+        title=dict(
+            text="Traces Sorted by Ascending Predictoor Profit"
+        )
+    ),
+    hovermode='closest'
+)
+
+def plot_data(file_path, selected_calibrations, selected_autoregressives, y_column):
+    df_without_eth, df_with_eth = load_data_from_csv(file_path)
+    traces_without_eth = generate_traces(df_without_eth, selected_calibrations, selected_autoregressives, y_column)
+    yaxis_title = "Predictoor Profit (OCEAN)" if y_column == 'pdr_profit_OCEAN' else "Trader Profit (USD)"
+    fig_without_eth = go.Figure(data=traces_without_eth, layout=layout)
+    fig_without_eth.update_layout(title=f"{df_without_eth['Model'].iloc[0]} - Predictoor Profit Benchmarks (Trained with BTC-USDT Data) - {y_column}", yaxis_title=yaxis_title)
+    fig_without_eth.show()
+    traces_with_eth = generate_traces(df_with_eth, selected_calibrations, selected_autoregressives, y_column)
+    fig_with_eth = go.Figure(data=traces_with_eth, layout=layout)
+    fig_with_eth.update_layout(title=f"{df_with_eth['Model'].iloc[0]} - Predictoor Profit Benchmarks (Trained with BTC-USDT & ETH-USDT Data) - {y_column}", yaxis_title=yaxis_title)
+    fig_with_eth.show()
+
+selected_calibrations = ["None", "Isotonic", "Sigmoid"]
+selected_autoregressives = ["1", "2"]
+y_column = 'trader_profit_USD'  # Example Column to plot: 'pdr_profit_OCEAN' or 'trader_profit_USD'
+plot_data(file_path, selected_calibrations, selected_autoregressives, y_column)
diff --git a/pdr_backend/plot_model_comparison.py b/pdr_backend/plot_model_comparison.py
new file mode 100644
index 000000000..5ecbf9d1e
--- /dev/null
+++ b/pdr_backend/plot_model_comparison.py
@@ -0,0 +1,90 @@
+import pandas as pd
+import plotly.graph_objects as go
+import os
+
+file_paths = [
+    '/Users/abc/Dev/ClassifLinearLasso_Summary.csv',
+    '/Users/abc/Dev/Balanced ClassifLinearLasso_Summary.csv',
+    '/Users/abc/Dev/ClassifLinearRidge_Summary.csv',
+    '/Users/abc/Dev/Balanced ClassifLinearRidge_Summary.csv',
+    '/Users/abc/Dev/ClassifLinearElasticNet_Summary.csv',
+    '/Users/abc/Dev/Balanced ClassifLinearElasticNet_Summary.csv'
+]
+
+def load_and_process_csv(file_path):
+    df = pd.read_csv(file_path, na_values=[''])
+    df['Calibration'] = df['Calibration'].fillna('None')
+    model_name = os.path.basename(file_path).split('_')[0]
+    df['Model'] = model_name
+    print(df.dtypes)  # Check the data types to ensure they are read correctly
+    return df
+
+
+def get_top_traces_combined(df, y_column):
+    if 'Model' not in df.columns:
+        raise ValueError("Model column not found in DataFrame")
+    grouped = df.groupby(['Model', 'Calibration', 'predictoor_ss.aimodel_data_ss.autoregressive_n'])
+    max_profits = grouped[y_column].max().reset_index()
+    top_traces = max_profits.nlargest(3, y_column)
+    top_trace_indices = top_traces[['Model', 'Calibration', 'predictoor_ss.aimodel_data_ss.autoregressive_n']]
+    top_trace_full_df = df.merge(top_trace_indices, on=['Model', 'Calibration', 'predictoor_ss.aimodel_data_ss.autoregressive_n'])
+    return top_trace_full_df
+
+def generate_traces(df, green_shades, y_column):
+    traces = []
+    grouped = df.groupby(['Model', 'Calibration', 'predictoor_ss.aimodel_data_ss.autoregressive_n'])
+    sorted_groups = grouped[y_column].max().reset_index().sort_values(by=y_column, ascending=False)  # Sorting highest to lowest
+
+    temp_traces = []
+
+    for _, row in sorted_groups.iterrows():
+        group_df = grouped.get_group((row['Model'], row['Calibration'], row['predictoor_ss.aimodel_data_ss.autoregressive_n']))
+        color = green_shades.pop(0)
+        autoregressive_n = int(row['predictoor_ss.aimodel_data_ss.autoregressive_n'])  # Ensure it's formatted as an integer
+        trace = go.Scatter(
+            x=group_df['predictoor_ss.aimodel_data_ss.max_n_train'],
+            y=group_df[y_column],
+            name=f"{row['Model']}: {row['Calibration']} & Autoregressive_n = {autoregressive_n}",
+            marker=dict(color=color),
+            mode='lines+markers'
+        )
+        temp_traces.append(trace)
+
+    traces.extend(reversed(temp_traces))
+    return traces
+
+def plot_data_from_csvs(file_paths, y_column, eth_column):
+    all_data = []
+    for file_path in file_paths:
+        df = load_and_process_csv(file_path)
+        all_data.append(df)
+
+    combined_df = pd.concat(all_data, ignore_index=True)
+    df_without_eth = combined_df[~combined_df[eth_column].str.contains('ETH', na=False)]
+    df_with_eth = combined_df[combined_df[eth_column].str.contains('ETH', na=False)]
+
+    plot_data(df_without_eth, y_column, "(Trained on BTC-USDT Data)")
+    plot_data(df_with_eth, y_column, "(Trained on BTC & ETH-USDT Data)")
+
+def plot_data(df, y_column, title_suffix):
+    if 'Model' not in df.columns:
+        raise ValueError("Model column not found in DataFrame")
+    top_traces_df = get_top_traces_combined(df, y_column)
+    green_shades = ['#267326', '#66cc66', '#adebad']  # Dark to light green
+    traces = generate_traces(top_traces_df, green_shades.copy(), y_column)
+    profit_type = "Predictoor Profit (OCEAN)" if y_column == 'pdr_profit_OCEAN' else "Trader Profit (USD)"
+    layout = go.Layout(
+        title=dict(text=f"Top 3 Highest {profit_type} Scores - {title_suffix}", x=0.5),
+        xaxis=dict(title="Max_N_Train", tickvals=[1000, 2000, 5000], ticktext=["1000", "2000", "5000"]),
+        yaxis=dict(title=profit_type, tickmode='auto', showgrid=True, tickfont=dict(size=10), automargin=True),
+        margin=dict(l=70, r=20, t=60, b=40),
+        showlegend=True,
+        legend=dict(title=dict(text="Traces Sorted by Ascending Profit")),
+        hovermode='closest'
+    )
+    fig = go.Figure(data=traces, layout=layout)
+    fig.show()
+
+y_column = 'trader_profit_USD'  # Can be 'pdr_profit_OCEAN' or 'trader_profit_USD'
+eth_column = 'predictoor_ss.predict_train_feedsets'  # Adjust the column name as necessary
+plot_data_from_csvs(file_paths, y_column, eth_column)

From c8dcf339f5a70a8741f41bc0b0bba3ade6897d37 Mon Sep 17 00:00:00 2001
From: V <101472818+graceful-coder@users.noreply.github.com>
Date: Tue, 17 Sep 2024 10:25:10 -0400
Subject: [PATCH 2/7] Added to benchmarks folder

---
 pdr_backend/{ => benchmarks}/plot_each_model.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pdr_backend/{ => benchmarks}/plot_each_model.py (100%)

diff --git a/pdr_backend/plot_each_model.py b/pdr_backend/benchmarks/plot_each_model.py
similarity index 100%
rename from pdr_backend/plot_each_model.py
rename to pdr_backend/benchmarks/plot_each_model.py

From fa3ee428a84f9971e3aaee8fcc1b9565ab941ce4 Mon Sep 17 00:00:00 2001
From: V <101472818+graceful-coder@users.noreply.github.com>
Date: Tue, 17 Sep 2024 10:25:34 -0400
Subject: [PATCH 3/7] Added to benchmarks folder

---
 pdr_backend/{ => benchmarks}/plot_model_comparison.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pdr_backend/{ => benchmarks}/plot_model_comparison.py (100%)

diff --git a/pdr_backend/plot_model_comparison.py b/pdr_backend/benchmarks/plot_model_comparison.py
similarity index 100%
rename from pdr_backend/plot_model_comparison.py
rename to pdr_backend/benchmarks/plot_model_comparison.py

From 12eccc963acb72fe61ba4320fad91f83f5a2ebc3 Mon Sep 17 00:00:00 2001
From: V <101472818+graceful-coder@users.noreply.github.com>
Date: Tue, 17 Sep 2024 11:09:25 -0400
Subject: [PATCH 4/7] Linted plot_each_model.py

---
 pdr_backend/benchmarks/plot_each_model.py | 158 ++++++++++++++--------
 1 file changed, 103 insertions(+), 55 deletions(-)

diff --git a/pdr_backend/benchmarks/plot_each_model.py b/pdr_backend/benchmarks/plot_each_model.py
index fa22029bc..bb3a65b4c 100644
--- a/pdr_backend/benchmarks/plot_each_model.py
+++ b/pdr_backend/benchmarks/plot_each_model.py
@@ -1,82 +1,130 @@
+"""
+
+Takes a Simulation's CSV data and plots each model by calibration.
+
+"""
+
 import os
 import pandas as pd
-import plotly.graph_objects as go
+import plotly.graph_objects as go  # type: ignore
 
 # Example file path
-# file_path = "/Users/foxylady/Dev/ClassifLinearSVM_50kIterations_Summary.csv"
-file_path = "[Enter Your File Path to CSV Data]"
+FILE_PATH = (
+    "/Users/foxylady/Dev/ClassifLinearElasticNet Balanced_50kIterations_Summary.csv"
+)
+
 
 def load_data_from_csv(file_path):
-    df = pd.read_csv(file_path, na_values=[''])
-    df['Calibration'] = df['Calibration'].fillna('None')
-    model_name = os.path.basename(file_path).split('_')[0]
-    df['Model'] = model_name
-    df_without_eth = df[~df['predictoor_ss.predict_train_feedsets'].str.contains('ETH')].copy()
-    df_with_eth = df[df['predictoor_ss.predict_train_feedsets'].str.contains('ETH')].copy()
-    color_mapping = {
-        'Sigmoid': 'orange',
-        'Isotonic': 'blue',
-        'None': 'fuchsia'
-    }
-    df_without_eth['Color'] = df_without_eth['Calibration'].map(color_mapping)
-    df_with_eth['Color'] = df_with_eth['Calibration'].map(color_mapping)
-    print(f"Data Types:\n{df.dtypes}")  # Check the data types to ensure they are read correctly
+    """
+    Loads Sim data from a CSV file into 2 dataframes.
+
+    Returns:
+        Two dataframes, one with ETH data and one without.
+    """
+    # Function body
+    df = pd.read_csv(file_path, na_values=[""])
+    df["Calibration"] = df["Calibration"].fillna("None")
+    model_name = os.path.basename(file_path).split("_")[0]
+    df["Model"] = model_name
+    df_without_eth = df[
+        ~df["predictoor_ss.predict_train_feedsets"].str.contains("ETH")
+    ].copy()
+    df_with_eth = df[
+        df["predictoor_ss.predict_train_feedsets"].str.contains("ETH")
+    ].copy()
+    color_mapping = {"Sigmoid": "orange", "Isotonic": "blue", "None": "fuchsia"}
+    df_without_eth["Color"] = df_without_eth["Calibration"].map(color_mapping)
+    df_with_eth["Color"] = df_with_eth["Calibration"].map(color_mapping)
+    print(
+        f"Data Types:\n{df.dtypes}"
+    )  # Check the data types to ensure they are read correctly
     return df_without_eth, df_with_eth
 
-def generate_traces(df, selected_calibrations, selected_autoregressives, y_column):
+
+def generate_traces(df, calibrations, autoregressive_n, y_column):
+    """
+    Generates traces for the given dataframes to be plotted.
+
+    Returns:
+        List of traces.
+    """
     traces = []
-    for calibration in selected_calibrations:
-        for autoregressive in selected_autoregressives:
-            filtered_df = df[(df['Calibration'] == calibration) & (df['predictoor_ss.aimodel_data_ss.autoregressive_n'] == int(autoregressive))]
+    for calibration in calibrations:
+        for autoregressive in autoregressive_n:
+            filtered_df = df[
+                (df["Calibration"] == calibration)
+                & (
+                    df["predictoor_ss.aimodel_data_ss.autoregressive_n"]
+                    == int(autoregressive)
+                )
+            ]
             if not filtered_df.empty:
                 traces.append(
                     go.Scatter(
-                        x=filtered_df['predictoor_ss.aimodel_data_ss.max_n_train'],
+                        x=filtered_df["predictoor_ss.aimodel_data_ss.max_n_train"],
                         y=filtered_df[y_column],
                         name=f"{calibration} & Autoregressive_n = {autoregressive}",
-                        marker=dict(color=filtered_df['Color'].iloc[0]),
-                        customdata=[calibration, autoregressive]
+                        marker={"color": filtered_df["Color"].iloc[0]},
+                        customdata=[calibration, autoregressive],
                     )
                 )
             else:
-                print(f"No data for {calibration} with Autoregressive_n = {autoregressive}")
+                print(
+                    f"No data for {calibration} with Autoregressive_n = {autoregressive}"
+                )
     return traces
 
-layout = go.Layout(
-    title="ClassifLinearRidge_Balanced Predictoor Profit Benchmarks for Three Calibrations",
-    xaxis=dict(
-        title="Max_N_Train",
-        tickvals=[1000, 2000, 5000],
-        ticktext=["1000", "2000", "5000"]
-    ),
-    margin=dict(
-        l=70,
-        r=20,
-        t=60,
-        b=40
-    ),
-    showlegend=True,
-    legend=dict(
-        title=dict(
-            text="Traces Sorted by Ascending Predictoor Profit"
-        )
-    ),
-    hovermode='closest'
-)
 
-def plot_data(file_path, selected_calibrations, selected_autoregressives, y_column):
-    df_without_eth, df_with_eth = load_data_from_csv(file_path)
-    traces_without_eth = generate_traces(df_without_eth, selected_calibrations, selected_autoregressives, y_column)
-    yaxis_title = "Predictoor Profit (OCEAN)" if y_column == 'pdr_profit_OCEAN' else "Trader Profit (USD)"
+layout = {
+    "title": {"text": "Traces Sorted by Ascending Predictoor Profit"},
+    "xaxis": {
+        "title": "Max_N_Train",
+        "tickvals": [1000, 2000, 5000],
+        "ticktext": ["1000", "2000", "5000"],
+    },
+    "margin": {"l": 70, "r": 20, "t": 60, "b": 40},
+    "showlegend": True,
+    "legend": {"title": {"text": "Traces Sorted by Ascending Predictoor Profit"}},
+    "hovermode": "closest",
+}
+
+
+def plot_data(filename, calibration, autoregressive_n, y_column):
+    """
+    Plots the data from the given CSV file.
+
+    Returns:
+        Two plots, one with ETH data and one without.
+    """
+    df_without_eth, df_with_eth = load_data_from_csv(filename)
+    traces_without_eth = generate_traces(
+        df_without_eth, calibration, autoregressive_n, y_column
+    )
+    yaxis_title = (
+        "Predictoor Profit (OCEAN)"
+        if y_column == "pdr_profit_OCEAN"
+        else "Trader Profit (USD)"
+    )
     fig_without_eth = go.Figure(data=traces_without_eth, layout=layout)
-    fig_without_eth.update_layout(title=f"{df_without_eth['Model'].iloc[0]} - Predictoor Profit Benchmarks (Trained with BTC-USDT Data) - {y_column}", yaxis_title=yaxis_title)
+    fig_without_eth.update_layout(
+        title=f"{df_without_eth['Model'].iloc[0]} - "
+        + f"Predictoor Profit Benchmarks (Trained with BTC-USDT Data) - {y_column}",
+        yaxis_title=yaxis_title,
+    )
     fig_without_eth.show()
-    traces_with_eth = generate_traces(df_with_eth, selected_calibrations, selected_autoregressives, y_column)
+    traces_with_eth = generate_traces(
+        df_with_eth, selected_calibrations, selected_autoregressives, y_column
+    )
     fig_with_eth = go.Figure(data=traces_with_eth, layout=layout)
-    fig_with_eth.update_layout(title=f"{df_with_eth['Model'].iloc[0]} - Predictoor Profit Benchmarks (Trained with BTC-USDT & ETH-USDT Data) - {y_column}", yaxis_title=yaxis_title)
+    fig_with_eth.update_layout(
+        title=f"{df_with_eth['Model'].iloc[0]} - "
+        + f"Predictoor Profit Benchmarks (Trained with BTC-USDT & ETH-USDT Data) - {y_column}",
+        yaxis_title=yaxis_title,
+    )
     fig_with_eth.show()
 
+
 selected_calibrations = ["None", "Isotonic", "Sigmoid"]
 selected_autoregressives = ["1", "2"]
-y_column = 'trader_profit_USD'  # Example Column to plot: 'pdr_profit_OCEAN' or 'trader_profit_USD'
-plot_data(file_path, selected_calibrations, selected_autoregressives, y_column)
+Y_COLUMN = "pdr_profit_OCEAN"  # Example Column to plot: 'pdr_profit_OCEAN' or 'trader_profit_USD'
+plot_data(FILE_PATH, selected_calibrations, selected_autoregressives, Y_COLUMN)

From f236335ad5f5ec5287b1a481738e8821633e281d Mon Sep 17 00:00:00 2001
From: V <101472818+graceful-coder@users.noreply.github.com>
Date: Tue, 17 Sep 2024 11:33:05 -0400
Subject: [PATCH 5/7] Linted plot_model_comparison.py

---
 .../benchmarks/plot_model_comparison.py       | 146 +++++++++++++-----
 1 file changed, 108 insertions(+), 38 deletions(-)

diff --git a/pdr_backend/benchmarks/plot_model_comparison.py b/pdr_backend/benchmarks/plot_model_comparison.py
index 5ecbf9d1e..20e4d53e2 100644
--- a/pdr_backend/benchmarks/plot_model_comparison.py
+++ b/pdr_backend/benchmarks/plot_model_comparison.py
@@ -1,90 +1,160 @@
-import pandas as pd
-import plotly.graph_objects as go
+"""
+
+Takes multiple Simulation CSVs for different models and plots the three most profitable traces.
+
+"""
+
 import os
+import pandas as pd
+import plotly.graph_objects as go  # type: ignore
+
 
-file_paths = [
-    '/Users/abc/Dev/ClassifLinearLasso_Summary.csv',
-    '/Users/abc/Dev/Balanced ClassifLinearLasso_Summary.csv',
-    '/Users/abc/Dev/ClassifLinearRidge_Summary.csv',
-    '/Users/abc/Dev/Balanced ClassifLinearRidge_Summary.csv',
-    '/Users/abc/Dev/ClassifLinearElasticNet_Summary.csv',
-    '/Users/abc/Dev/Balanced ClassifLinearElasticNet_Summary.csv'
+FILE_PATHS = [
+    "/Users/abc/Dev/ClassifLinearLasso_Summary.csv",
+    "/Users/abc/Dev/Balanced ClassifLinearLasso_Summary.csv",
+    "/Users/abc/Dev/ClassifLinearRidge_Summary.csv",
+    "/Users/abc/Dev/Balanced ClassifLinearRidge_Summary.csv",
+    "/Users/abc/Dev/ClassifLinearElasticNet_Summary.csv",
+    "/Users/abc/Dev/Balanced ClassifLinearElasticNet_Summary.csv",
 ]
 
+
 def load_and_process_csv(file_path):
-    df = pd.read_csv(file_path, na_values=[''])
-    df['Calibration'] = df['Calibration'].fillna('None')
-    model_name = os.path.basename(file_path).split('_')[0]
-    df['Model'] = model_name
+    """
+    Loads Sim data from a CSV file into a dataframe.
+    """
+
+    df = pd.read_csv(file_path, na_values=[""])
+    df["Calibration"] = df["Calibration"].fillna("None")
+    model_name = os.path.basename(file_path).split("_")[0]
+    df["Model"] = model_name
     print(df.dtypes)  # Check the data types to ensure they are read correctly
     return df
 
 
 def get_top_traces_combined(df, y_column):
-    if 'Model' not in df.columns:
+    """
+    Gets the top 3 most profitable traces for each model, calibration, and autoregressive_n.
+    """
+
+    if "Model" not in df.columns:
         raise ValueError("Model column not found in DataFrame")
-    grouped = df.groupby(['Model', 'Calibration', 'predictoor_ss.aimodel_data_ss.autoregressive_n'])
+    grouped = df.groupby(
+        ["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"]
+    )
     max_profits = grouped[y_column].max().reset_index()
     top_traces = max_profits.nlargest(3, y_column)
-    top_trace_indices = top_traces[['Model', 'Calibration', 'predictoor_ss.aimodel_data_ss.autoregressive_n']]
-    top_trace_full_df = df.merge(top_trace_indices, on=['Model', 'Calibration', 'predictoor_ss.aimodel_data_ss.autoregressive_n'])
+    top_trace_indices = top_traces[
+        ["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"]
+    ]
+    top_trace_full_df = df.merge(
+        top_trace_indices,
+        on=["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"],
+    )
     return top_trace_full_df
 
+
 def generate_traces(df, green_shades, y_column):
+    """
+    Generates plotly traces for each model, calibration, and autoregressive_n.
+    """
+
     traces = []
-    grouped = df.groupby(['Model', 'Calibration', 'predictoor_ss.aimodel_data_ss.autoregressive_n'])
-    sorted_groups = grouped[y_column].max().reset_index().sort_values(by=y_column, ascending=False)  # Sorting highest to lowest
+    grouped = df.groupby(
+        ["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"]
+    )
+    sorted_groups = (
+        grouped[y_column].max().reset_index().sort_values(by=y_column, ascending=False)
+    )  # Sorting highest to lowest
 
     temp_traces = []
 
     for _, row in sorted_groups.iterrows():
-        group_df = grouped.get_group((row['Model'], row['Calibration'], row['predictoor_ss.aimodel_data_ss.autoregressive_n']))
+        group_df = grouped.get_group(
+            (
+                row["Model"],
+                row["Calibration"],
+                row["predictoor_ss.aimodel_data_ss.autoregressive_n"],
+            )
+        )
         color = green_shades.pop(0)
-        autoregressive_n = int(row['predictoor_ss.aimodel_data_ss.autoregressive_n'])  # Ensure it's formatted as an integer
+        autoregressive_n = int(
+            row["predictoor_ss.aimodel_data_ss.autoregressive_n"]
+        )  # Ensure it's formatted as an integer
         trace = go.Scatter(
-            x=group_df['predictoor_ss.aimodel_data_ss.max_n_train'],
+            x=group_df["predictoor_ss.aimodel_data_ss.max_n_train"],
             y=group_df[y_column],
             name=f"{row['Model']}: {row['Calibration']} & Autoregressive_n = {autoregressive_n}",
-            marker=dict(color=color),
-            mode='lines+markers'
+            marker={"color": color},
+            mode="lines+markers",
         )
         temp_traces.append(trace)
 
     traces.extend(reversed(temp_traces))
     return traces
 
+
 def plot_data_from_csvs(file_paths, y_column, eth_column):
+    """
+    Loads and processes the CSV files, then passes the data to plot_data.
+    """
+
     all_data = []
     for file_path in file_paths:
         df = load_and_process_csv(file_path)
         all_data.append(df)
 
     combined_df = pd.concat(all_data, ignore_index=True)
-    df_without_eth = combined_df[~combined_df[eth_column].str.contains('ETH', na=False)]
-    df_with_eth = combined_df[combined_df[eth_column].str.contains('ETH', na=False)]
+    df_without_eth = combined_df[~combined_df[eth_column].str.contains("ETH", na=False)]
+    df_with_eth = combined_df[combined_df[eth_column].str.contains("ETH", na=False)]
 
     plot_data(df_without_eth, y_column, "(Trained on BTC-USDT Data)")
     plot_data(df_with_eth, y_column, "(Trained on BTC & ETH-USDT Data)")
 
+
 def plot_data(df, y_column, title_suffix):
-    if 'Model' not in df.columns:
+    """
+    Formats and plots the data from the dataframe.
+    """
+
+    if "Model" not in df.columns:
         raise ValueError("Model column not found in DataFrame")
     top_traces_df = get_top_traces_combined(df, y_column)
-    green_shades = ['#267326', '#66cc66', '#adebad']  # Dark to light green
+    green_shades = ["#267326", "#66cc66", "#adebad"]  # Dark to light green
     traces = generate_traces(top_traces_df, green_shades.copy(), y_column)
-    profit_type = "Predictoor Profit (OCEAN)" if y_column == 'pdr_profit_OCEAN' else "Trader Profit (USD)"
+    profit_type = (
+        "Predictoor Profit (OCEAN)"
+        if y_column == "pdr_profit_OCEAN"
+        else "Trader Profit (USD)"
+    )
     layout = go.Layout(
-        title=dict(text=f"Top 3 Highest {profit_type} Scores - {title_suffix}", x=0.5),
-        xaxis=dict(title="Max_N_Train", tickvals=[1000, 2000, 5000], ticktext=["1000", "2000", "5000"]),
-        yaxis=dict(title=profit_type, tickmode='auto', showgrid=True, tickfont=dict(size=10), automargin=True),
-        margin=dict(l=70, r=20, t=60, b=40),
+        title={
+            "text": f"Top 3 Highest {profit_type} Scores - {title_suffix}",
+            "x": 0.5,
+        },
+        xaxis={
+            "title": "Max_N_Train",
+            "tickvals": [1000, 2000, 5000],
+            "ticktext": ["1000", "2000", "5000"],
+        },
+        yaxis={
+            "title": profit_type,
+            "tickmode": "auto",
+            "showgrid": True,
+            "tickfont": {"size": 10},
+            "title_standoff": 25,
+        },
+        margin={"l": 70, "r": 20, "t": 60, "b": 40},
         showlegend=True,
-        legend=dict(title=dict(text="Traces Sorted by Ascending Profit")),
-        hovermode='closest'
+        legend={"title": {"text": "Traces Sorted by Ascending Profit"}},
+        hovermode="closest",
     )
     fig = go.Figure(data=traces, layout=layout)
     fig.show()
 
-y_column = 'trader_profit_USD'  # Can be 'pdr_profit_OCEAN' or 'trader_profit_USD'
-eth_column = 'predictoor_ss.predict_train_feedsets'  # Adjust the column name as necessary
-plot_data_from_csvs(file_paths, y_column, eth_column)
+
+Y_COLUMN = "pdr_profit_OCEAN"  # Can be 'pdr_profit_OCEAN' or 'trader_profit_USD'
+ETH_COLUMN = (
+    "predictoor_ss.predict_train_feedsets"  # Adjust the column name as necessary
+)
+plot_data_from_csvs(FILE_PATHS, Y_COLUMN, ETH_COLUMN)

From 21767cd8475e9560d025fa9cacebbb41ba7c4327 Mon Sep 17 00:00:00 2001
From: V <101472818+graceful-coder@users.noreply.github.com>
Date: Tue, 17 Sep 2024 11:33:31 -0400
Subject: [PATCH 6/7] Generalized file_path in plot_each_model.py

---
 pdr_backend/benchmarks/plot_each_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pdr_backend/benchmarks/plot_each_model.py b/pdr_backend/benchmarks/plot_each_model.py
index bb3a65b4c..c23a28ce1 100644
--- a/pdr_backend/benchmarks/plot_each_model.py
+++ b/pdr_backend/benchmarks/plot_each_model.py
@@ -10,7 +10,7 @@
 
 # Example file path
 FILE_PATH = (
-    "/Users/foxylady/Dev/ClassifLinearElasticNet Balanced_50kIterations_Summary.csv"
+    "/Users/abc/Dev/ClassifLinearElasticNet Balanced_50kIterations_Summary.csv"
 )
 
 

From 07a50269d0d8502d46cceef479e2ca4d16440e48 Mon Sep 17 00:00:00 2001
From: V <101472818+graceful-coder@users.noreply.github.com>
Date: Tue, 17 Sep 2024 11:35:50 -0400
Subject: [PATCH 7/7] Re-linted plot_each_model.py

---
 pdr_backend/benchmarks/plot_each_model.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pdr_backend/benchmarks/plot_each_model.py b/pdr_backend/benchmarks/plot_each_model.py
index c23a28ce1..0b6507c8d 100644
--- a/pdr_backend/benchmarks/plot_each_model.py
+++ b/pdr_backend/benchmarks/plot_each_model.py
@@ -9,9 +9,7 @@
 import plotly.graph_objects as go  # type: ignore
 
 # Example file path
-FILE_PATH = (
-    "/Users/abc/Dev/ClassifLinearElasticNet Balanced_50kIterations_Summary.csv"
-)
+FILE_PATH = "/Users/abc/Dev/ClassifLinearElasticNet Balanced_50kIterations_Summary.csv"
 
 
 def load_data_from_csv(file_path):