Skip to content

Fix #1332: Add benchmark plots #1613

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions pdr_backend/benchmarks/plot_each_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"""

Takes a Simulation's CSV data and plots each model by calibration.

"""

import os
import pandas as pd
import plotly.graph_objects as go # type: ignore

# Example file path
FILE_PATH = "/Users/abc/Dev/ClassifLinearElasticNet Balanced_50kIterations_Summary.csv"


def load_data_from_csv(file_path):
"""
Loads Sim data from a CSV file into 2 dataframes.

Returns:
Two dataframes, one with ETH data and one without.
"""
# Function body
df = pd.read_csv(file_path, na_values=[""])
df["Calibration"] = df["Calibration"].fillna("None")
model_name = os.path.basename(file_path).split("_")[0]
df["Model"] = model_name
df_without_eth = df[
~df["predictoor_ss.predict_train_feedsets"].str.contains("ETH")
].copy()
df_with_eth = df[
df["predictoor_ss.predict_train_feedsets"].str.contains("ETH")
].copy()
color_mapping = {"Sigmoid": "orange", "Isotonic": "blue", "None": "fuchsia"}
df_without_eth["Color"] = df_without_eth["Calibration"].map(color_mapping)
df_with_eth["Color"] = df_with_eth["Calibration"].map(color_mapping)
print(
f"Data Types:\n{df.dtypes}"
) # Check the data types to ensure they are read correctly
return df_without_eth, df_with_eth


def generate_traces(df, calibrations, autoregressive_n, y_column):
"""
Generates traces for the given dataframes to be plotted.

Returns:
List of traces.
"""
traces = []
for calibration in calibrations:
for autoregressive in autoregressive_n:
filtered_df = df[
(df["Calibration"] == calibration)
& (
df["predictoor_ss.aimodel_data_ss.autoregressive_n"]
== int(autoregressive)
)
]
if not filtered_df.empty:
traces.append(
go.Scatter(
x=filtered_df["predictoor_ss.aimodel_data_ss.max_n_train"],
y=filtered_df[y_column],
name=f"{calibration} & Autoregressive_n = {autoregressive}",
marker={"color": filtered_df["Color"].iloc[0]},
customdata=[calibration, autoregressive],
)
)
else:
print(
f"No data for {calibration} with Autoregressive_n = {autoregressive}"
)
return traces


layout = {
"title": {"text": "Traces Sorted by Ascending Predictoor Profit"},
"xaxis": {
"title": "Max_N_Train",
"tickvals": [1000, 2000, 5000],
"ticktext": ["1000", "2000", "5000"],
},
"margin": {"l": 70, "r": 20, "t": 60, "b": 40},
"showlegend": True,
"legend": {"title": {"text": "Traces Sorted by Ascending Predictoor Profit"}},
"hovermode": "closest",
}


def plot_data(filename, calibration, autoregressive_n, y_column):
"""
Plots the data from the given CSV file.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a clear alternative is below

def _create_plot(df, calibration, autoregressive_n, y_column, title):
    traces = generate_traces(df, calibration, autoregressive_n, y_column)
    yaxis_title = (
        "Predictoor Profit (OCEAN)"
        if y_column == "pdr_profit_OCEAN"
        else "Trader Profit (USD)"
    )
    fig = go.Figure(data=traces, layout=layout)
    fig.update_layout(
        title=f"{df['Model'].iloc[0]} - {title} - {y_column}",
        yaxis_title=yaxis_title,
    )
    fig.show()


def plot_data(filename, calibration, autoregressive_n, y_column):
    # Load data from CSV
    df_without_eth, df_with_eth = load_data_from_csv(filename)
    
    # Plot without ETH
    title_without_eth = "Predictoor Profit Benchmarks (Trained with BTC-USDT Data)"
    _create_plot(df_without_eth, calibration, autoregressive_n, y_column, title_without_eth)
    
    # Plot with ETH
    title_with_eth = "Predictoor Profit Benchmarks (Trained with BTC-USDT & ETH-USDT Data)"
    _create_plot(df_with_eth, calibration, autoregressive_n, y_column, title_with_eth)
    
    ```


Returns:
Two plots, one with ETH data and one without.
"""
df_without_eth, df_with_eth = load_data_from_csv(filename)
traces_without_eth = generate_traces(
df_without_eth, calibration, autoregressive_n, y_column
)
yaxis_title = (
"Predictoor Profit (OCEAN)"
if y_column == "pdr_profit_OCEAN"
else "Trader Profit (USD)"
)
fig_without_eth = go.Figure(data=traces_without_eth, layout=layout)
fig_without_eth.update_layout(
title=f"{df_without_eth['Model'].iloc[0]} - "
+ f"Predictoor Profit Benchmarks (Trained with BTC-USDT Data) - {y_column}",
yaxis_title=yaxis_title,
)
fig_without_eth.show()
traces_with_eth = generate_traces(
df_with_eth, selected_calibrations, selected_autoregressives, y_column
)
fig_with_eth = go.Figure(data=traces_with_eth, layout=layout)
fig_with_eth.update_layout(
title=f"{df_with_eth['Model'].iloc[0]} - "
+ f"Predictoor Profit Benchmarks (Trained with BTC-USDT & ETH-USDT Data) - {y_column}",
yaxis_title=yaxis_title,
)
fig_with_eth.show()


selected_calibrations = ["None", "Isotonic", "Sigmoid"]
selected_autoregressives = ["1", "2"]
Y_COLUMN = "pdr_profit_OCEAN" # Example Column to plot: 'pdr_profit_OCEAN' or 'trader_profit_USD'
plot_data(FILE_PATH, selected_calibrations, selected_autoregressives, Y_COLUMN)
160 changes: 160 additions & 0 deletions pdr_backend/benchmarks/plot_model_comparison.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""

Takes multiple Simulation CSVs for different models and plots the three most profitable traces.

"""

import os
import pandas as pd
import plotly.graph_objects as go # type: ignore


FILE_PATHS = [
"/Users/abc/Dev/ClassifLinearLasso_Summary.csv",
"/Users/abc/Dev/Balanced ClassifLinearLasso_Summary.csv",
"/Users/abc/Dev/ClassifLinearRidge_Summary.csv",
"/Users/abc/Dev/Balanced ClassifLinearRidge_Summary.csv",
"/Users/abc/Dev/ClassifLinearElasticNet_Summary.csv",
"/Users/abc/Dev/Balanced ClassifLinearElasticNet_Summary.csv",
]


def load_and_process_csv(file_path):
"""
Loads Sim data from a CSV file into a dataframe.
"""

df = pd.read_csv(file_path, na_values=[""])
df["Calibration"] = df["Calibration"].fillna("None")
model_name = os.path.basename(file_path).split("_")[0]
df["Model"] = model_name
print(df.dtypes) # Check the data types to ensure they are read correctly
return df


def get_top_traces_combined(df, y_column):
"""
Gets the top 3 most profitable traces for each model, calibration, and autoregressive_n.
"""

if "Model" not in df.columns:
raise ValueError("Model column not found in DataFrame")
grouped = df.groupby(
["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"]
)
max_profits = grouped[y_column].max().reset_index()
top_traces = max_profits.nlargest(3, y_column)
top_trace_indices = top_traces[
["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"]
]
top_trace_full_df = df.merge(
top_trace_indices,
on=["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"],
)
return top_trace_full_df


def generate_traces(df, green_shades, y_column):
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks difficult to read. You might want to consider splitting this function.

Generates plotly traces for each model, calibration, and autoregressive_n.
"""

traces = []
grouped = df.groupby(
["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"]
)
sorted_groups = (
grouped[y_column].max().reset_index().sort_values(by=y_column, ascending=False)
) # Sorting highest to lowest

temp_traces = []

for _, row in sorted_groups.iterrows():
group_df = grouped.get_group(
(
row["Model"],
row["Calibration"],
row["predictoor_ss.aimodel_data_ss.autoregressive_n"],
)
)
color = green_shades.pop(0)
autoregressive_n = int(
row["predictoor_ss.aimodel_data_ss.autoregressive_n"]
) # Ensure it's formatted as an integer
trace = go.Scatter(
x=group_df["predictoor_ss.aimodel_data_ss.max_n_train"],
y=group_df[y_column],
name=f"{row['Model']}: {row['Calibration']} & Autoregressive_n = {autoregressive_n}",
marker={"color": color},
mode="lines+markers",
)
temp_traces.append(trace)

traces.extend(reversed(temp_traces))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this function, you append everythink to the traces and temp_traces variables are created inside, you append everything into the temp_traces (good), then you extend the traces variable by reversing the the temp_traces but the traces variable is already empty.

Why don't we do like that?

I assumed we have _get_group_data and _create_trace functions, think it like a pseudo-code

   traces = []

    # Grouping the dataframe
    grouped = df.groupby(["Model", "Calibration", "predictoor_ss.aimodel_data_ss.autoregressive_n"])

    # Sorting the groups by the maximum y_column value in descending order
    sorted_groups = (
        grouped[y_column].max().reset_index().sort_values(by=y_column, ascending=False)
    )

    # Generating the traces
    for _, row in sorted_groups.iterrows():
        group_df = _get_group_data(grouped, row)
        color = green_shades.pop(0)  # Get the next available color
        trace = _create_trace(group_df, row, color, y_column)
        traces.append(trace)

    return list(reversed(traces))
    ```

return traces


def plot_data_from_csvs(file_paths, y_column, eth_column):
"""
Loads and processes the CSV files, then passes the data to plot_data.
"""

all_data = []
for file_path in file_paths:
df = load_and_process_csv(file_path)
all_data.append(df)

combined_df = pd.concat(all_data, ignore_index=True)
df_without_eth = combined_df[~combined_df[eth_column].str.contains("ETH", na=False)]
df_with_eth = combined_df[combined_df[eth_column].str.contains("ETH", na=False)]

plot_data(df_without_eth, y_column, "(Trained on BTC-USDT Data)")
plot_data(df_with_eth, y_column, "(Trained on BTC & ETH-USDT Data)")


def plot_data(df, y_column, title_suffix):
"""
Formats and plots the data from the dataframe.
"""

if "Model" not in df.columns:
raise ValueError("Model column not found in DataFrame")
top_traces_df = get_top_traces_combined(df, y_column)
green_shades = ["#267326", "#66cc66", "#adebad"] # Dark to light green
traces = generate_traces(top_traces_df, green_shades.copy(), y_column)
profit_type = (
"Predictoor Profit (OCEAN)"
if y_column == "pdr_profit_OCEAN"
else "Trader Profit (USD)"
)
layout = go.Layout(
title={
"text": f"Top 3 Highest {profit_type} Scores - {title_suffix}",
"x": 0.5,
},
xaxis={
"title": "Max_N_Train",
"tickvals": [1000, 2000, 5000],
"ticktext": ["1000", "2000", "5000"],
},
yaxis={
"title": profit_type,
"tickmode": "auto",
"showgrid": True,
"tickfont": {"size": 10},
"title_standoff": 25,
},
margin={"l": 70, "r": 20, "t": 60, "b": 40},
showlegend=True,
legend={"title": {"text": "Traces Sorted by Ascending Profit"}},
hovermode="closest",
)
fig = go.Figure(data=traces, layout=layout)
fig.show()


Y_COLUMN = "pdr_profit_OCEAN" # Can be 'pdr_profit_OCEAN' or 'trader_profit_USD'
ETH_COLUMN = (
"predictoor_ss.predict_train_feedsets" # Adjust the column name as necessary
)
plot_data_from_csvs(FILE_PATHS, Y_COLUMN, ETH_COLUMN)
Loading