-
Notifications
You must be signed in to change notification settings - Fork 9
Improve plotting: separate subplots per benchmark + add gate ratio metric #174
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
78b0983
4ced7f1
e1c51b3
298c86d
e27638c
a3ce8c4
e5ac54a
f68b089
da1e16e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,21 +3,23 @@ | |
| import sys | ||
| import pandas as pd | ||
| import matplotlib.pyplot as plt | ||
| import numpy as np | ||
| from ucc_bench.results import SuiteResultsDatabase, to_df_timing, to_df_simulation | ||
|
|
||
| from shared import calculate_abs_relative_error, get_compiler_colormap | ||
|
|
||
| BAR_WIDTH = 0.2 | ||
|
|
||
|
|
||
| def generate_plot( | ||
| def generate_subplots( | ||
| df: pd.DataFrame, | ||
| plot_configs: list[dict], | ||
| latest_date: str, | ||
| out_path: Path, | ||
| use_pdf: bool = False, | ||
| ): | ||
| """Generic plotting function to create bar charts for benchmark data.""" | ||
| """Generate subplots with separate subplot per benchmark. | ||
|
|
||
| Generic function for creating subplot layouts for both compilation and simulation benchmarks. | ||
| """ | ||
| # Configure matplotlib for LaTeX output if PDF export is requested | ||
| if use_pdf: | ||
| plt.rcParams.update( | ||
|
|
@@ -27,62 +29,102 @@ def generate_plot( | |
| } | ||
| ) | ||
|
|
||
| circuit_names = sorted(df["benchmark_id"].unique()) | ||
| x_positions = range(len(circuit_names)) | ||
| circuit_name_to_index = {name: i for i, name in enumerate(circuit_names)} | ||
| color_map = get_compiler_colormap() | ||
|
|
||
| num_plots = len(plot_configs) | ||
| fig, axes = plt.subplots(1, num_plots, figsize=(7 * num_plots, 7), squeeze=False) | ||
| axes = axes.flatten() | ||
|
|
||
| benchmarks = sorted(df["benchmark_id"].unique()) | ||
| compilers = df["compiler"].unique() | ||
| for i, compiler_name in enumerate(compilers): | ||
| grp = df[df["compiler"] == compiler_name] | ||
| grp_indices = grp["benchmark_id"].map(circuit_name_to_index) | ||
| bar_positions = [idx + i * BAR_WIDTH for idx in grp_indices] | ||
|
|
||
| for ax, config in zip(axes, plot_configs): | ||
| ax.bar( | ||
| bar_positions, | ||
| grp[config["y_col"]], | ||
| width=BAR_WIDTH, | ||
| label=compiler_name, | ||
| color=color_map.get(compiler_name), | ||
| ) | ||
|
|
||
| for ax, config in zip(axes, plot_configs): | ||
| ax.set_title(f"{config['title']} (Date: {latest_date})") | ||
| ax.set_xlabel("Circuit Name") | ||
| ax.set_ylabel(config["ylabel"]) | ||
| ax.set_xticks(x_positions) | ||
| ax.set_xticklabels(circuit_names, rotation=75, ha="right") | ||
| ax.set_yscale("log") | ||
| ax.legend(title="Compiler") | ||
|
|
||
| plt.tight_layout() | ||
| print(f"Saving plot to {out_path}") | ||
| fig.savefig(out_path, dpi=300, bbox_inches="tight") | ||
| plt.close(fig) | ||
| n_benchmarks = len(benchmarks) | ||
| ncols = 3 | ||
| nrows = 2 | ||
|
|
||
| # Ensure we don't have more benchmarks than subplots | ||
| if n_benchmarks > nrows * ncols: | ||
| raise ValueError( | ||
| f"Too many benchmarks ({n_benchmarks}) for fixed {nrows}x{ncols} grid. " | ||
| f"Maximum supported: {nrows * ncols}" | ||
| ) | ||
|
|
||
| # Create separate figures for each metric | ||
| for config in plot_configs: | ||
| fig, axes = plt.subplots( | ||
| nrows, ncols, figsize=(5 * ncols, 4 * nrows), squeeze=False | ||
| ) | ||
| axes = axes.flatten() | ||
| color_map = get_compiler_colormap() | ||
|
|
||
| for i, ax in enumerate(axes): | ||
| if i < n_benchmarks: | ||
| benchmark = benchmarks[i] | ||
| sub = df[df["benchmark_id"] == benchmark] | ||
|
|
||
| # Extract values for each compiler | ||
| values = sub[config["y_col"]].values | ||
| compiler_names = sub["compiler"].values | ||
|
|
||
| # Create bars | ||
| x_positions = np.arange(len(compiler_names)) | ||
| ax.bar( | ||
| x_positions, | ||
| values, | ||
| color=[ | ||
| color_map.get(compiler, "#4C72B0") | ||
| for compiler in compiler_names | ||
| ], | ||
| width=0.5, | ||
| ) | ||
|
|
||
| ax.set_xticks(x_positions) | ||
| ax.set_xticklabels(compiler_names, rotation=30, ha="right") | ||
| ax.set_title(f"Benchmark: {benchmark}") | ||
| ax.set_ylabel(config["ylabel"]) | ||
| # Use log scale only if specified in config (default to True for backwards compatibility) | ||
| if config.get("use_log_scale", True): | ||
| ax.set_yscale("log") | ||
|
|
||
| else: | ||
| ax.set_visible(False) | ||
|
|
||
| plt.suptitle(f"{config['title']} (Date: {latest_date})", fontsize=16) | ||
| plt.tight_layout(rect=[0, 0, 1, 0.96]) | ||
|
|
||
| # Save with metric-specific filename | ||
| metric_name = config["y_col"].replace("_", "-") | ||
| metric_out_path = ( | ||
| out_path.parent / f"{out_path.stem}_{metric_name}{out_path.suffix}" | ||
| ) | ||
| print(f"Saving plot to {metric_out_path}") | ||
| fig.savefig(metric_out_path, dpi=300, bbox_inches="tight") | ||
| plt.close(fig) | ||
|
|
||
|
|
||
| def plot_compilation( | ||
| df: pd.DataFrame, latest_date: str, out_path: Path, use_pdf: bool = False | ||
| ): | ||
| """Generates and saves plots for compilation benchmark data.""" | ||
| df_comp = df.copy() | ||
| df_comp["compiled_ratio"] = ( | ||
| df_comp["compiled_multiq_gates"] / df_comp["raw_multiq_gates"] | ||
| ) | ||
|
|
||
| plot_configs = [ | ||
| { | ||
| "y_col": "compile_time", | ||
| "title": "Compiler Performance", | ||
| "ylabel": "Compile Time (s)", | ||
| "use_log_scale": True, | ||
| }, | ||
| { | ||
| "y_col": "compiled_multiq_gates", | ||
| "title": "Gate Counts", | ||
| "ylabel": "Compiled Gate Count", | ||
| "ylabel": "Compiled Multi-Qubit Gate Count", | ||
| "use_log_scale": True, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be better to have
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also noting this is a gap in the existing code already! |
||
| }, | ||
| { | ||
| "y_col": "compiled_ratio", | ||
| "title": "Compiled Gate Ratio", | ||
| "ylabel": "Compiled Gates / Raw Gates", | ||
| "use_log_scale": False, | ||
| }, | ||
| ] | ||
| generate_plot(df, plot_configs, latest_date, out_path, use_pdf) | ||
| generate_subplots(df_comp, plot_configs, latest_date, out_path, use_pdf) | ||
|
|
||
|
|
||
| def plot_simulation( | ||
|
|
@@ -109,7 +151,7 @@ def plot_simulation( | |
| "ylabel": "Absolute Relative Error", | ||
| }, | ||
| ] | ||
| generate_plot(df_sim, plot_configs, latest_date, out_path, use_pdf) | ||
| generate_subplots(df_sim, plot_configs, latest_date, out_path, use_pdf) | ||
|
|
||
|
|
||
| def main(): | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is more a style nit, but the logscale for the prep_select and qv benchmarks has way more hashes labeled. Is there a nice way to make it less "busy"? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If there are more than 6 benchmark results, does this silently just not show the additional plots? I'd consider either supporting arbitrary number of benchmark results, or at least asserting/erroring its not the hard coded 6 results.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a good point. An error could perhaps be raised when the number of benchmarks exceeds the 6 available subplots.