oceanprotocol · graceful-coder · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024
diff --git a/pdr_backend/benchmarks/plot_each_model.py b/pdr_backend/benchmarks/plot_each_model.py
@@ -0,0 +1,128 @@
+"""
+
+Takes a Simulation's CSV data and plots each model by calibration.
+
+"""
+
+import os
+import pandas as pd
+import plotly.graph_objects as go  # type: ignore
+
+# Example file path
+FILE_PATH = "/Users/abc/Dev/ClassifLinearElasticNet Balanced_50kIterations_Summary.csv"
+
+
+def load_data_from_csv(file_path):
+    """
+    Loads Sim data from a CSV file into 2 dataframes.
+
+    Returns:
+        Two dataframes, one with ETH data and one without.
+    """
+    # Function body
+    df = pd.read_csv(file_path, na_values=[""])
+    df["Calibration"] = df["Calibration"].fillna("None")
+    model_name = os.path.basename(file_path).split("_")[0]
+    df["Model"] = model_name
+    df_without_eth = df[
+        ~df["predictoor_ss.predict_train_feedsets"].str.contains("ETH")
+    ].copy()
+    df_with_eth = df[
+        df["predictoor_ss.predict_train_feedsets"].str.contains("ETH")
+    ].copy()
+    color_mapping = {"Sigmoid": "orange", "Isotonic": "blue", "None": "fuchsia"}
+    df_without_eth["Color"] = df_without_eth["Calibration"].map(color_mapping)
+    df_with_eth["Color"] = df_with_eth["Calibration"].map(color_mapping)
+    print(
+        f"Data Types:\n{df.dtypes}"
+    )  # Check the data types to ensure they are read correctly
+    return df_without_eth, df_with_eth
+
+
+def generate_traces(df, calibrations, autoregressive_n, y_column):
+    """
+    Generates traces for the given dataframes to be plotted.
+
+    Returns:
+        List of traces.
+    """
+    traces = []
+    for calibration in calibrations:
+        for autoregressive in autoregressive_n:
+            filtered_df = df[
+                (df["Calibration"] == calibration)
+                & (
+                    df["predictoor_ss.aimodel_data_ss.autoregressive_n"]
+                    == int(autoregressive)
+                )
+            ]
+            if not filtered_df.empty:
+                traces.append(
+                    go.Scatter(
+                        x=filtered_df["predictoor_ss.aimodel_data_ss.max_n_train"],
+                        y=filtered_df[y_column],
+                        name=f"{calibration} & Autoregressive_n = {autoregressive}",
+                        marker={"color": filtered_df["Color"].iloc[0]},
+                        customdata=[calibration, autoregressive],
+                    )
+                )
+            else:
+                print(
+                    f"No data for {calibration} with Autoregressive_n = {autoregressive}"
+                )
+    return traces
+
+
+layout = {
+    "title": {"text": "Traces Sorted by Ascending Predictoor Profit"},
+    "xaxis": {
+        "title": "Max_N_Train",
+        "tickvals": [1000, 2000, 5000],
+        "ticktext": ["1000", "2000", "5000"],
+    },
+    "margin": {"l": 70, "r": 20, "t": 60, "b": 40},
+    "showlegend": True,
+    "legend": {"title": {"text": "Traces Sorted by Ascending Predictoor Profit"}},
+    "hovermode": "closest",
+}
+
+
+def plot_data(filename, calibration, autoregressive_n, y_column):
+    """
+    Plots the data from the given CSV file.
+
+    Returns:
+        Two plots, one with ETH data and one without.
+    """
+    df_without_eth, df_with_eth = load_data_from_csv(filename)
+    traces_without_eth = generate_traces(
+        df_without_eth, calibration, autoregressive_n, y_column
+    )
+    yaxis_title = (
+        "Predictoor Profit (OCEAN)"
+        if y_column == "pdr_profit_OCEAN"
+        else "Trader Profit (USD)"
+    )
+    fig_without_eth = go.Figure(data=traces_without_eth, layout=layout)
+    fig_without_eth.update_layout(
+        title=f"{df_without_eth['Model'].iloc[0]} - "
+        + f"Predictoor Profit Benchmarks (Trained with BTC-USDT Data) - {y_column}",
+        yaxis_title=yaxis_title,
+    )
+    fig_without_eth.show()
+    traces_with_eth = generate_traces(
+        df_with_eth, selected_calibrations, selected_autoregressives, y_column
+    )
+    fig_with_eth = go.Figure(data=traces_with_eth, layout=layout)
+    fig_with_eth.update_layout(
+        title=f"{df_with_eth['Model'].iloc[0]} - "
+        + f"Predictoor Profit Benchmarks (Trained with BTC-USDT & ETH-USDT Data) - {y_column}",
+        yaxis_title=yaxis_title,
+    )
+    fig_with_eth.show()
+
+
+selected_calibrations = ["None", "Isotonic", "Sigmoid"]
+selected_autoregressives = ["1", "2"]
+Y_COLUMN = "pdr_profit_OCEAN"  # Example Column to plot: 'pdr_profit_OCEAN' or 'trader_profit_USD'
+plot_data(FILE_PATH, selected_calibrations, selected_autoregressives, Y_COLUMN)
diff --git a/pdr_backend/benchmarks/plot_model_comparison.py b/pdr_backend/benchmarks/plot_model_comparison.py
@@ -0,0 +1,160 @@
+"""
+
+Takes multiple Simulation CSVs for different models and plots the three most profitable traces.
+
+"""
+
+import os
+import pandas as pd
+import plotly.graph_objects as go  # type: ignore
+
+
+FILE_PATHS = [
+    "/Users/abc/Dev/ClassifLinearLasso_Summary.csv",
+    "/Users/abc/Dev/Balanced ClassifLinearLasso_Summary.csv",
+    "/Users/abc/Dev/ClassifLinearRidge_Summary.csv",
+    "/Users/abc/Dev/Balanced ClassifLinearRidge_Summary.csv",
+    "/Users/abc/Dev/ClassifLinearElasticNet_Summary.csv",
+    "/Users/abc/Dev/Balanced ClassifLinearElasticNet_Summary.csv",
+]
+
+
+def load_and_process_csv(file_path):
+    """
+    Loads Sim data from a CSV file into a dataframe.
+    """
+
+    df = pd.read_csv(file_path, na_values=[""])
+    df["Calibration"] = df["Calibration"].fillna("None")
+    model_name = os.path.basename(file_path).split("_")[0]
+    df["Model"] = model_name
+    print(df.dtypes)  # Check the data types to ensure they are read correctly
+    return df
+
+
+def get_top_traces_combined(df, y_column):
+    """
+    Gets the top 3 most profitable traces for each model, calibration, and autoregressive_n.
+    """
+
+    if "Model" not in df.columns:
+        raise ValueError("Model column not found in DataFrame")
+    grouped = df.groupby(
+        ["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"]
+    )
+    max_profits = grouped[y_column].max().reset_index()
+    top_traces = max_profits.nlargest(3, y_column)
+    top_trace_indices = top_traces[
+        ["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"]
+    ]
+    top_trace_full_df = df.merge(
+        top_trace_indices,
+        on=["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"],
+    )
+    return top_trace_full_df
+
+
+def generate_traces(df, green_shades, y_column):
+    """
+    Generates plotly traces for each model, calibration, and autoregressive_n.
+    """
+
+    traces = []
+    grouped = df.groupby(
+        ["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"]
+    )
+    sorted_groups = (
+        grouped[y_column].max().reset_index().sort_values(by=y_column, ascending=False)
+    )  # Sorting highest to lowest
+
+    temp_traces = []
+
+    for _, row in sorted_groups.iterrows():
+        group_df = grouped.get_group(
+            (
+                row["Model"],
+                row["Calibration"],
+                row["predictoor_ss.aimodel_data_ss.autoregressive_n"],
+            )
+        )
+        color = green_shades.pop(0)
+        autoregressive_n = int(
+            row["predictoor_ss.aimodel_data_ss.autoregressive_n"]
+        )  # Ensure it's formatted as an integer
+        trace = go.Scatter(
+            x=group_df["predictoor_ss.aimodel_data_ss.max_n_train"],
+            y=group_df[y_column],
+            name=f"{row['Model']}: {row['Calibration']} & Autoregressive_n = {autoregressive_n}",
+            marker={"color": color},
+            mode="lines+markers",
+        )
+        temp_traces.append(trace)
+
+    traces.extend(reversed(temp_traces))
+    return traces
+
+
+def plot_data_from_csvs(file_paths, y_column, eth_column):
+    """
+    Loads and processes the CSV files, then passes the data to plot_data.
+    """
+
+    all_data = []
+    for file_path in file_paths:
+        df = load_and_process_csv(file_path)
+        all_data.append(df)
+
+    combined_df = pd.concat(all_data, ignore_index=True)
+    df_without_eth = combined_df[~combined_df[eth_column].str.contains("ETH", na=False)]
+    df_with_eth = combined_df[combined_df[eth_column].str.contains("ETH", na=False)]
+
+    plot_data(df_without_eth, y_column, "(Trained on BTC-USDT Data)")
+    plot_data(df_with_eth, y_column, "(Trained on BTC & ETH-USDT Data)")
+
+
+def plot_data(df, y_column, title_suffix):
+    """
+    Formats and plots the data from the dataframe.
+    """
+
+    if "Model" not in df.columns:
+        raise ValueError("Model column not found in DataFrame")
+    top_traces_df = get_top_traces_combined(df, y_column)
+    green_shades = ["#267326", "#66cc66", "#adebad"]  # Dark to light green
+    traces = generate_traces(top_traces_df, green_shades.copy(), y_column)
+    profit_type = (
+        "Predictoor Profit (OCEAN)"
+        if y_column == "pdr_profit_OCEAN"
+        else "Trader Profit (USD)"
+    )
+    layout = go.Layout(
+        title={
+            "text": f"Top 3 Highest {profit_type} Scores - {title_suffix}",
+            "x": 0.5,
+        },
+        xaxis={
+            "title": "Max_N_Train",
+            "tickvals": [1000, 2000, 5000],
+            "ticktext": ["1000", "2000", "5000"],
+        },
+        yaxis={
+            "title": profit_type,
+            "tickmode": "auto",
+            "showgrid": True,
+            "tickfont": {"size": 10},
+            "title_standoff": 25,
+        },
+        margin={"l": 70, "r": 20, "t": 60, "b": 40},
+        showlegend=True,
+        legend={"title": {"text": "Traces Sorted by Ascending Profit"}},
+        hovermode="closest",
+    )
+    fig = go.Figure(data=traces, layout=layout)
+    fig.show()
+
+
+Y_COLUMN = "pdr_profit_OCEAN"  # Can be 'pdr_profit_OCEAN' or 'trader_profit_USD'
+ETH_COLUMN = (
+    "predictoor_ss.predict_train_feedsets"  # Adjust the column name as necessary
+)
+plot_data_from_csvs(FILE_PATHS, Y_COLUMN, ETH_COLUMN)