From 9d344668109a7db664f283117ae9035d982f08af Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Sun, 14 Jul 2024 13:09:58 -0700 Subject: [PATCH] Adds performance annotations using the new Hatchet annotation interface --- thicket/ensemble.py | 13 ++++-- thicket/external/console.py | 8 ++++ thicket/groupby.py | 8 ++++ thicket/helpers.py | 9 +++++ thicket/model_extrap.py | 51 +++++++++++++++--------- thicket/ncu.py | 18 +++++++++ thicket/stats/calc_boxplot_statistics.py | 3 ++ thicket/stats/check_normality.py | 3 ++ thicket/stats/correlation_nodewise.py | 9 +++-- thicket/stats/display_boxplot.py | 3 ++ thicket/stats/display_heatmap.py | 4 ++ thicket/stats/display_histogram.py | 2 + thicket/stats/display_violinplot.py | 6 +++ thicket/stats/maximum.py | 3 ++ thicket/stats/mean.py | 3 ++ thicket/stats/median.py | 3 ++ thicket/stats/minimum.py | 3 ++ thicket/stats/percentiles.py | 3 ++ thicket/stats/preference.py | 15 ++++--- thicket/stats/scoring.py | 11 +++++ thicket/stats/std.py | 3 ++ thicket/stats/ttest.py | 3 ++ thicket/stats/variance.py | 3 ++ thicket/thicket.py | 39 ++++++++++++++++++ thicket/utils.py | 10 +++++ 25 files changed, 206 insertions(+), 30 deletions(-) diff --git a/thicket/ensemble.py b/thicket/ensemble.py index a3960569..66e0dcb6 100644 --- a/thicket/ensemble.py +++ b/thicket/ensemble.py @@ -6,6 +6,7 @@ from collections import OrderedDict from hatchet import GraphFrame +from hatchet.util.perf_measure import annotate import pandas as pd import thicket.helpers as helpers @@ -18,10 +19,14 @@ ) +_ensemble_annotate = annotate(fmt="Ensemble.{}") + + class Ensemble: """Operations pertaining to ensembling.""" @staticmethod + @_ensemble_annotate def _unify(thickets, inplace=False, disable_tqdm=False): """Create union graph from list of thickets and sync their DataFrames. @@ -83,6 +88,7 @@ def _unify(thickets, inplace=False, disable_tqdm=False): return union_graph, _thickets @staticmethod + @_ensemble_annotate def _columns( thickets, headers=None, @@ -186,9 +192,9 @@ def _handle_misc(): combined_th.profile = [new_mappings[prf] for prf in combined_th.profile] profile_mapping_cp = combined_th.profile_mapping.copy() for k, v in profile_mapping_cp.items(): - combined_th.profile_mapping[ - new_mappings[k] - ] = combined_th.profile_mapping.pop(k) + combined_th.profile_mapping[new_mappings[k]] = ( + combined_th.profile_mapping.pop(k) + ) combined_th.performance_cols = helpers._get_perf_columns( combined_th.dataframe ) @@ -331,6 +337,7 @@ def _handle_statsframe(): return combined_th @staticmethod + @_ensemble_annotate def _index( thickets, from_statsframes=False, fill_perfdata=True, disable_tqdm=False ): diff --git a/thicket/external/console.py b/thicket/external/console.py index f21bcc2c..a6e07723 100644 --- a/thicket/external/console.py +++ b/thicket/external/console.py @@ -4,14 +4,19 @@ import pandas as pd from hatchet.external.console import ConsoleRenderer from hatchet.util.colormaps import ColorMaps +from hatchet.util.perf_measure import annotate from ..version import __version__ +_thicket_console_renderer_annotate = annotate(fmt="ThicketRenderer.{}") + + class ThicketRenderer(ConsoleRenderer): """Extends the Hatchet ConsoleRenderer to support multi-dimensional Thicket data.""" # pylint: disable=W1401 + @_thicket_console_renderer_annotate def render_preamble(self): lines = [ r" _____ _ _ _ _ ", @@ -25,6 +30,7 @@ def render_preamble(self): return "\n".join(lines) + @_thicket_console_renderer_annotate def render(self, roots, dataframe, **kwargs): self.render_header = kwargs["render_header"] @@ -143,6 +149,7 @@ def render(self, roots, dataframe, **kwargs): else: return result.encode("utf-8") + @_thicket_console_renderer_annotate def render_legend(self): def render_label(index, low, high): metric_range = self.max_metric - self.min_metric @@ -218,6 +225,7 @@ def render_label(index, low, high): return legend + @_thicket_console_renderer_annotate def render_frame(self, node, dataframe, indent="", child_indent=""): node_depth = node._depth if node_depth < self.depth: diff --git a/thicket/groupby.py b/thicket/groupby.py index 56b3c1ed..a694021f 100644 --- a/thicket/groupby.py +++ b/thicket/groupby.py @@ -7,12 +7,19 @@ import pandas as pd +from hatchet.util.perf_measure import annotate + + +_groupby_annotate = annotate(fmt="GroupBy.{}") + class GroupBy(dict): + @_groupby_annotate def __init__(self, by=None, *args, **kwargs): super(GroupBy, self).__init__(*args, **kwargs) self.by = by + @_groupby_annotate def agg(self, func, disable_tqdm=False): """Aggregate the Thickets' PerfData numerical columns in a GroupBy object. @@ -32,6 +39,7 @@ def agg(self, func, disable_tqdm=False): return agg_tk + @_groupby_annotate def aggregate_thicket(self, tk, func): """Aggregate a Thicket's numerical columns given a statistical function. diff --git a/thicket/helpers.py b/thicket/helpers.py index c6e7314c..47b75f8b 100644 --- a/thicket/helpers.py +++ b/thicket/helpers.py @@ -5,8 +5,10 @@ from more_itertools import powerset import pandas as pd +from hatchet.util.perf_measure import annotate +@annotate() def _are_synced(gh, df): """Check if node objects are equal in graph and dataframe id(graph_node) == id(df_node). @@ -22,6 +24,7 @@ def _are_synced(gh, df): return True +@annotate() def _missing_nodes_to_list(a_df, b_df): """Get a list of node differences between two dataframes. Mainly used for "tree" function. @@ -65,6 +68,7 @@ def _missing_nodes_to_list(a_df, b_df): return missing_nodes +@annotate() def _new_statsframe_df(df, multiindex=False): """Generate new aggregated statistics table from a dataframe. This is most commonly needed when changes are made to the performance data table's index. @@ -92,6 +96,7 @@ def _new_statsframe_df(df, multiindex=False): return new_df +@annotate() def _print_graph(graph): """Print the nodes in a hatchet graph""" i = 0 @@ -101,6 +106,7 @@ def _print_graph(graph): return i +@annotate() def _resolve_missing_indicies(th_list): """Resolve indices if at least 1 profile has an index that another doesn't. @@ -124,6 +130,7 @@ def _resolve_missing_indicies(th_list): th.dataframe.set_index(idx, append=True, inplace=True) +@annotate() def _set_node_ordering(thickets): """Set node ordering for each thicket in a list. All thickets must have node ordering on, otherwise it will be set to False. @@ -139,6 +146,7 @@ def _set_node_ordering(thickets): tk.graph.enumerate_traverse() +@annotate() def _get_perf_columns(df): """Get list of performance dataframe columns that are numeric. @@ -165,6 +173,7 @@ def _get_perf_columns(df): return [x for x in numeric_columns if "nid" not in x] +@annotate() def _powerset_from_tuple(tup): pset = [y for y in powerset(tup)] return {x[0] if len(x) == 1 else x for x in pset} diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 9f779fb3..fe08a2ca 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -16,11 +16,16 @@ ) # For some reason it errors if "Experiment" is not explicitly imported from extrap.fileio import io_helper from extrap.modelers.model_generator import ModelGenerator +from hatchet.util.perf_measure import annotate MODEL_TAG = "_extrap-model" +_extrap_model_wrapper_annotate = annotate(fmt="ExtrapModelWrapper.{}") +_extrap_modelling_annotate = annotate(fmt="ExtrapModelling.{}") + + class ModelWrapper: """Wrapper for an Extra-P model. @@ -29,18 +34,22 @@ class ModelWrapper: the model. """ + @_extrap_model_wrapper_annotate def __init__(self, mdl, param_name): self.mdl = mdl self.param_name = param_name # Needed for plotting / displaying the model + @_extrap_model_wrapper_annotate def __str__(self): """Display self as a function""" return str(self.mdl.hypothesis.function) + @_extrap_model_wrapper_annotate def eval(self, val): """Evaluate function (self) at val. f(val) = result""" return self.mdl.hypothesis.function.evaluate(val) + @_extrap_model_wrapper_annotate def display(self, RSS): """Display function @@ -89,6 +98,7 @@ def display(self, RSS): class Modeling: """Produce models for all the metrics across the given graphframes.""" + @_extrap_modelling_annotate def __init__(self, tht, param_name, params=None, chosen_metrics=None): """Create a new model object. @@ -128,15 +138,16 @@ def __init__(self, tht, param_name, params=None, chosen_metrics=None): v: k for k, v in self.tht.profile_mapping.items() } for file_name, value in params.items(): - self.tht.metadata.at[ - profile_mapping_flipped[file_name], param_name - ] = value + self.tht.metadata.at[profile_mapping_flipped[file_name], param_name] = ( + value + ) self.params = tht.metadata[param_name].tolist() if not chosen_metrics: self.chosen_metrics = self.tht.exc_metrics + self.tht.inc_metrics else: self.chosen_metrics = chosen_metrics + @_extrap_modelling_annotate def to_html(self, RSS=False): def model_to_img_html(model_obj): fig, ax = model_obj.display(RSS) @@ -155,6 +166,7 @@ def model_to_img_html(model_obj): [met + MODEL_TAG for met in self.chosen_metrics] ].to_html(escape=False, formatters=frm_dict) + @_extrap_modelling_annotate def _add_extrap_statistics(self, node, metric): """Insert the Extra-P hypothesis function statistics into the aggregated statistics table. Has to be called after "produce_models". @@ -167,22 +179,23 @@ def _add_extrap_statistics(self, node, metric): node, metric + MODEL_TAG ].mdl.hypothesis - self.tht.statsframe.dataframe.at[ - node, metric + "_RSS" + MODEL_TAG - ] = hypothesis_fn.RSS - self.tht.statsframe.dataframe.at[ - node, metric + "_rRSS" + MODEL_TAG - ] = hypothesis_fn.rRSS - self.tht.statsframe.dataframe.at[ - node, metric + "_SMAPE" + MODEL_TAG - ] = hypothesis_fn.SMAPE - self.tht.statsframe.dataframe.at[ - node, metric + "_AR2" + MODEL_TAG - ] = hypothesis_fn.AR2 - self.tht.statsframe.dataframe.at[ - node, metric + "_RE" + MODEL_TAG - ] = hypothesis_fn.RE + self.tht.statsframe.dataframe.at[node, metric + "_RSS" + MODEL_TAG] = ( + hypothesis_fn.RSS + ) + self.tht.statsframe.dataframe.at[node, metric + "_rRSS" + MODEL_TAG] = ( + hypothesis_fn.rRSS + ) + self.tht.statsframe.dataframe.at[node, metric + "_SMAPE" + MODEL_TAG] = ( + hypothesis_fn.SMAPE + ) + self.tht.statsframe.dataframe.at[node, metric + "_AR2" + MODEL_TAG] = ( + hypothesis_fn.AR2 + ) + self.tht.statsframe.dataframe.at[node, metric + "_RE" + MODEL_TAG] = ( + hypothesis_fn.RE + ) + @_extrap_modelling_annotate def produce_models(self, agg_func=mean, add_stats=True): """Produces an Extra-P model. Models are generated by calling Extra-P's ModelGenerator. @@ -282,6 +295,7 @@ def produce_models(self, agg_func=mean, add_stats=True): if add_stats: self._add_extrap_statistics(node, met) + @_extrap_modelling_annotate def _componentize_function(model_object): """Componentize one Extra-P modeling object into a dictionary of its parts @@ -307,6 +321,7 @@ def _componentize_function(model_object): return term_dict + @_extrap_modelling_annotate def componentize_statsframe(self, columns=None): """Componentize multiple Extra-P modeling objects in the aggregated statistics table diff --git a/thicket/ncu.py b/thicket/ncu.py index 0e8440c5..8416553b 100644 --- a/thicket/ncu.py +++ b/thicket/ncu.py @@ -6,12 +6,16 @@ from collections import defaultdict from hatchet import QueryMatcher +from hatchet.util.perf_measure import annotate, begin_code_region, end_code_region import pandas as pd from tqdm import tqdm import ncu_report +_ncu_reader_annotate = annotate(fmt="NCUReader.{}") + + class NCUReader: """Object to interface and pull NCU report data into Thicket""" @@ -24,6 +28,7 @@ class NCUReader: } @staticmethod + @_ncu_reader_annotate def _build_query_from_ncu_trace(kernel_call_trace): """Build QueryLanguage query from an NCU kernel call trace @@ -63,6 +68,7 @@ def _predicate_builder(kernel, is_regex=False): return query @staticmethod + @_ncu_reader_annotate def _read_ncu(thicket, ncu_report_mapping): """Read NCU report files and return dictionary of data. @@ -80,6 +86,7 @@ def _read_ncu(thicket, ncu_report_mapping): # Kernel mapping from NCU kernel to thicket node to save re-querying kernel_map = {} + begin_code_region("iterate_over_ncu_reports") # Loop through NCU files for ncu_report_file in ncu_report_mapping: # NCU hash @@ -87,7 +94,9 @@ def _read_ncu(thicket, ncu_report_mapping): ncu_hash = profile_mapping_flipped[ncu_report_mapping[ncu_report_file]] # Load file + begin_code_region("load_ncu_report") report = ncu_report.load_report(ncu_report_file) + end_code_region("load_ncu_report") # Error check if report.num_ranges() > 1: @@ -96,6 +105,7 @@ def _read_ncu(thicket, ncu_report_mapping): + ncu_report_file + " has multiple ranges. Not supported yet." ) + begin_code_region("iterate_through_single_report") # Loop through ranges in report for range in report: # Grab first action @@ -111,14 +121,17 @@ def _read_ncu(thicket, ncu_report_mapping): # Query action in range pbar = tqdm(range) + begin_code_region("iterate_over_kernel_names") for i, action in enumerate(pbar): pbar.set_description(f"Processing action {i}/{len(range)}") # Name of kernel kernel_name = action.name() # Get NCU-side kernel trace + begin_code_region("get_kernel_trace") kernel_call_trace = list( action.nvtx_state().domain_by_id(0).push_pop_ranges() ) + end_code_region("get_kernel_trace") # Skip warmup kernels if len(kernel_call_trace) == 0: @@ -133,6 +146,7 @@ def _read_ncu(thicket, ncu_report_mapping): # Skip query building matched_node = kernel_map[kernel_name] else: # kernel hasn't been seen yet + begin_code_region("process_new_kernel_with_query") # Build query query = NCUReader._build_query_from_ncu_trace( kernel_call_trace @@ -143,6 +157,7 @@ def _read_ncu(thicket, ncu_report_mapping): matched_node = [ n for n in node_set if kernel_name in n.frame["name"] ][0] + end_code_region("process_new_kernel_with_query") # matched_node should always exist at this point assert matched_node is not None @@ -154,5 +169,8 @@ def _read_ncu(thicket, ncu_report_mapping): data_dict[(matched_node, ncu_hash)].append( dict(zip(metric_names, metric_values)) ) + end_code_region("iterate_over_kernel_names") + end_code_region("iterate_through_single_report") + end_code_region("iterate_over_ncu_reports") return data_dict, rollup_dict diff --git a/thicket/stats/calc_boxplot_statistics.py b/thicket/stats/calc_boxplot_statistics.py index 982afc95..73020b95 100644 --- a/thicket/stats/calc_boxplot_statistics.py +++ b/thicket/stats/calc_boxplot_statistics.py @@ -6,11 +6,14 @@ import pandas as pd import numpy as np +from hatchet.util.perf_measure import annotate + from ..utils import verify_thicket_structures from .stats_utils import cache_stats_op @cache_stats_op +@annotate() def calc_boxplot_statistics(thicket, columns=[], quartiles=[0.25, 0.5, 0.75], **kwargs): """Calculate boxplots lowerfence, q1, q2, q3, iqr, upperfence, and outliers for each node in the performance data table. diff --git a/thicket/stats/check_normality.py b/thicket/stats/check_normality.py index a5166109..14aabaab 100644 --- a/thicket/stats/check_normality.py +++ b/thicket/stats/check_normality.py @@ -6,11 +6,14 @@ import pandas as pd from scipy import stats +from hatchet.util.perf_measure import annotate + from ..utils import verify_thicket_structures from .stats_utils import cache_stats_op @cache_stats_op +@annotate() def check_normality(thicket, columns=None): """Determine if the data is normal or non-normal for each node in the performance data table. diff --git a/thicket/stats/correlation_nodewise.py b/thicket/stats/correlation_nodewise.py index a700d5d1..070925bd 100644 --- a/thicket/stats/correlation_nodewise.py +++ b/thicket/stats/correlation_nodewise.py @@ -5,11 +5,14 @@ from scipy import stats +from hatchet.util.perf_measure import annotate + from ..utils import verify_thicket_structures from .stats_utils import cache_stats_op @cache_stats_op +@annotate() def correlation_nodewise(thicket, column1=None, column2=None, correlation="pearson"): """Calculate the nodewise correlation for each node in the performance data table. @@ -74,9 +77,9 @@ def correlation_nodewise(thicket, column1=None, column2=None, correlation="pears raise ValueError( "Invalid correlation, options are pearson, spearman, and kendall." ) - thicket.statsframe.dataframe[ - column1 + "_vs_" + column2 + " " + correlation - ] = correlated + thicket.statsframe.dataframe[column1 + "_vs_" + column2 + " " + correlation] = ( + correlated + ) output_column_names.append(column1 + "_vs_" + column2 + " " + correlation) # columnar joined thicket object else: diff --git a/thicket/stats/display_boxplot.py b/thicket/stats/display_boxplot.py index ba9bd139..ecc72358 100644 --- a/thicket/stats/display_boxplot.py +++ b/thicket/stats/display_boxplot.py @@ -7,10 +7,13 @@ import seaborn as sns import hatchet as ht +from hatchet.util.perf_measure import annotate + import thicket as th from ..utils import verify_thicket_structures +@annotate() def display_boxplot(thicket, nodes=None, columns=None, **kwargs): """Display a boxplot for each user passed node(s) and column(s). The passed nodes and columns must be from the performance data table. diff --git a/thicket/stats/display_heatmap.py b/thicket/stats/display_heatmap.py index 54b774cf..ea7df031 100644 --- a/thicket/stats/display_heatmap.py +++ b/thicket/stats/display_heatmap.py @@ -6,9 +6,13 @@ import seaborn as sns import thicket as th + +from hatchet.util.perf_measure import annotate + from ..utils import verify_thicket_structures +@annotate() def display_heatmap(thicket, columns=None, **kwargs): """Display a heatmap which contains a full list of nodes and user passed columns. Columns must be from the aggregated statistics table. diff --git a/thicket/stats/display_histogram.py b/thicket/stats/display_histogram.py index 06358f96..bee175d2 100644 --- a/thicket/stats/display_histogram.py +++ b/thicket/stats/display_histogram.py @@ -4,11 +4,13 @@ # SPDX-License-Identifier: MIT import hatchet as ht +from hatchet.util.perf_measure import annotate import thicket as th from ..utils import verify_thicket_structures +@annotate() def display_histogram(thicket, node=None, column=None, **kwargs): """Display a histogram for a user passed node and column. Node and column must come from the performance data table. diff --git a/thicket/stats/display_violinplot.py b/thicket/stats/display_violinplot.py index 542e2cf8..a5b90e70 100644 --- a/thicket/stats/display_violinplot.py +++ b/thicket/stats/display_violinplot.py @@ -8,11 +8,14 @@ import matplotlib as mpl import hatchet as ht +from hatchet.util.perf_measure import annotate + import thicket as th from .percentiles import percentiles from ..utils import verify_thicket_structures +@annotate() def _column_name_mapper(current_cols): """ Internal function that returns a string representation of 'current_cols'. @@ -31,6 +34,7 @@ def _column_name_mapper(current_cols): return str(current_cols) +@annotate() def _add_percentile_lines( graph, graphType, @@ -164,6 +168,7 @@ def _add_percentile_lines( return graph +@annotate() def display_violinplot( thicket, nodes=None, @@ -303,6 +308,7 @@ def display_violinplot( return sns.violinplot(data=filtered_df, x="node", y=" ", **kwargs) +@annotate() def display_violinplot_thicket( thickets, nodes=None, diff --git a/thicket/stats/maximum.py b/thicket/stats/maximum.py index 7c30e408..d8303eff 100644 --- a/thicket/stats/maximum.py +++ b/thicket/stats/maximum.py @@ -3,11 +3,14 @@ # # SPDX-License-Identifier: MIT +from hatchet.util.perf_measure import annotate + from ..utils import verify_thicket_structures from .stats_utils import cache_stats_op @cache_stats_op +@annotate() def maximum(thicket, columns=None): """Determine the maximum for each node in the performance data table. diff --git a/thicket/stats/mean.py b/thicket/stats/mean.py index 6f43b155..2f9c0235 100644 --- a/thicket/stats/mean.py +++ b/thicket/stats/mean.py @@ -5,11 +5,14 @@ import numpy as np +from hatchet.util.perf_measure import annotate + from ..utils import verify_thicket_structures from .stats_utils import cache_stats_op @cache_stats_op +@annotate() def mean(thicket, columns=None): """Calculate the mean for each node in the performance data table. diff --git a/thicket/stats/median.py b/thicket/stats/median.py index ce9fbd7c..85726568 100644 --- a/thicket/stats/median.py +++ b/thicket/stats/median.py @@ -5,11 +5,14 @@ import numpy as np +from hatchet.util.perf_measure import annotate + from ..utils import verify_thicket_structures from .stats_utils import cache_stats_op @cache_stats_op +@annotate() def median(thicket, columns=None): """Calculate the median for each node in the performance data table. diff --git a/thicket/stats/minimum.py b/thicket/stats/minimum.py index 8e4a27bd..c8561ad1 100644 --- a/thicket/stats/minimum.py +++ b/thicket/stats/minimum.py @@ -3,11 +3,14 @@ # # SPDX-License-Identifier: MIT +from hatchet.util.perf_measure import annotate + from ..utils import verify_thicket_structures from .stats_utils import cache_stats_op @cache_stats_op +@annotate() def minimum(thicket, columns=None): """Determine the minimum for each node in the performance data table. diff --git a/thicket/stats/percentiles.py b/thicket/stats/percentiles.py index 22b037f4..d2eeb24f 100644 --- a/thicket/stats/percentiles.py +++ b/thicket/stats/percentiles.py @@ -5,11 +5,14 @@ import pandas as pd +from hatchet.util.perf_measure import annotate + from ..utils import verify_thicket_structures from .stats_utils import cache_stats_op @cache_stats_op +@annotate() def percentiles(thicket, columns=None, percentiles=[0.25, 0.50, 0.75]): """Calculate the q-th percentile for each node in the performance data table. diff --git a/thicket/stats/preference.py b/thicket/stats/preference.py index 91b47d7c..c4c13231 100644 --- a/thicket/stats/preference.py +++ b/thicket/stats/preference.py @@ -3,6 +3,8 @@ # # SPDX-License-Identifier: MIT +from hatchet.util.perf_measure import annotate + from ..utils import verify_thicket_structures from .stats_utils import cache_stats_op from .ttest import __ttest @@ -11,6 +13,7 @@ @cache_stats_op +@annotate() def preference(thicket, columns, comparison_func, *args, test="ttest", **kwargs): """Determine a preference between compilers, architecture, platform, etc. @@ -108,12 +111,12 @@ def preference(thicket, columns, comparison_func, *args, test="ttest", **kwargs) ) col_name = ["Preference", aggregated_cols] - thicket.statsframe.dataframe[ - (col_name[0], col_name[1] + "_std_preferred") - ] = pref_std - thicket.statsframe.dataframe[ - (col_name[0], col_name[1] + "_mean_preferred") - ] = pref_mean + thicket.statsframe.dataframe[(col_name[0], col_name[1] + "_std_preferred")] = ( + pref_std + ) + thicket.statsframe.dataframe[(col_name[0], col_name[1] + "_mean_preferred")] = ( + pref_mean + ) output_column_names.append((col_name[0], col_name[1] + "_std_preferred")) output_column_names.append((col_name[0], col_name[1] + "_mean_preferred")) diff --git a/thicket/stats/scoring.py b/thicket/stats/scoring.py index eea4d0be..3d63d64f 100644 --- a/thicket/stats/scoring.py +++ b/thicket/stats/scoring.py @@ -2,11 +2,14 @@ import numpy as np +from hatchet.util.perf_measure import annotate + import thicket as th from ..utils import verify_thicket_structures from .stats_utils import cache_stats_op +@annotate() def _calc_score_delta_mean_delta_stdnorm(means_1, means_2, stds_1, stds_2, num_nodes): results = [] @@ -23,6 +26,7 @@ def _calc_score_delta_mean_delta_stdnorm(means_1, means_2, stds_1, stds_2, num_n return results +@annotate() def _calc_score_delta_mean_delta_coefficient_of_variation( means_1, means_2, stds_1, stds_2, num_nodes ): @@ -38,6 +42,7 @@ def _calc_score_delta_mean_delta_coefficient_of_variation( return results +@annotate() def _calc_score_bhattacharyya(means_1, means_2, stds_1, stds_2, num_nodes): results = [] @@ -60,6 +65,7 @@ def _calc_score_bhattacharyya(means_1, means_2, stds_1, stds_2, num_nodes): return results +@annotate() def _calc_score_hellinger(means_1, means_2, stds_1, stds_2, num_nodes): results = [] @@ -79,6 +85,7 @@ def _calc_score_hellinger(means_1, means_2, stds_1, stds_2, num_nodes): return results +@annotate() def score(thicket, columns, output_column_name, scoring_function): if isinstance(columns, list) is False: raise ValueError("Value passed to 'columns' must be of type list.") @@ -158,6 +165,7 @@ def score(thicket, columns, output_column_name, scoring_function): @cache_stats_op +@annotate() def score_delta_mean_delta_stdnorm(thicket, columns, output_column_name=None): r""" Apply a mean difference with standard deviation difference algorithm on two @@ -192,6 +200,7 @@ def score_delta_mean_delta_stdnorm(thicket, columns, output_column_name=None): @cache_stats_op +@annotate() def score_delta_mean_delta_coefficient_of_variation( thicket, columns, output_column_name=None ): @@ -230,6 +239,7 @@ def score_delta_mean_delta_coefficient_of_variation( @cache_stats_op +@annotate() def score_bhattacharyya(thicket, columns, output_column_name=None): r""" Apply the Bhattacharrya distance algorithm on two passed columns. The passed columns @@ -263,6 +273,7 @@ def score_bhattacharyya(thicket, columns, output_column_name=None): @cache_stats_op +@annotate() def score_hellinger(thicket, columns, output_column_name=None): r""" Apply the Hellinger's distance algorithm on two passed columns. The passed columns diff --git a/thicket/stats/std.py b/thicket/stats/std.py index f432c90e..4b4dc730 100644 --- a/thicket/stats/std.py +++ b/thicket/stats/std.py @@ -5,11 +5,14 @@ import numpy as np +from hatchet.util.perf_measure import annotate + from ..utils import verify_thicket_structures from .stats_utils import cache_stats_op @cache_stats_op +@annotate() def std(thicket, columns=None): """Calculate the standard deviation for each node in the performance data table. diff --git a/thicket/stats/ttest.py b/thicket/stats/ttest.py index aadbd760..93847b3a 100644 --- a/thicket/stats/ttest.py +++ b/thicket/stats/ttest.py @@ -7,9 +7,12 @@ from scipy.stats import ttest_ind_from_stats from scipy.stats import t +from hatchet.util.perf_measure import annotate + import thicket as th +@annotate() def __ttest(thicket, columns, alpha=0.05, *args, **kwargs): """Perform a ttest on a user-selected thicket and columns. diff --git a/thicket/stats/variance.py b/thicket/stats/variance.py index 73b0c5bf..fb120bcf 100644 --- a/thicket/stats/variance.py +++ b/thicket/stats/variance.py @@ -5,11 +5,14 @@ import numpy as np +from hatchet.util.perf_measure import annotate + from ..utils import verify_thicket_structures from .stats_utils import cache_stats_op @cache_stats_op +@annotate() def variance(thicket, columns=None): """Calculate the variance for each node in the performance data table. diff --git a/thicket/thicket.py b/thicket/thicket.py index 8d0e2a33..472b1d38 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -18,6 +18,7 @@ from hatchet import GraphFrame from hatchet.graph import Graph from hatchet.query import QueryEngine +from hatchet.util.perf_measure import annotate from thicket.query import ( Query, ObjectQuery, @@ -45,11 +46,15 @@ from .external.console import ThicketRenderer +_thicket_annotate = annotate(fmt="Thicket.{}") + + class Thicket(GraphFrame): """Ensemble of profiles, includes a graph and three dataframes, performance data, metadata, and aggregated statistics. """ + @_thicket_annotate def __init__( self, graph, @@ -101,6 +106,7 @@ def __init__( else: self.statsframe_ops_cache = statsframe_ops_cache + @_thicket_annotate def __eq__(self, other): """Compare two thicket objects. @@ -125,6 +131,7 @@ def __eq__(self, other): and self.statsframe.dataframe.equals(other.statsframe.dataframe) ) + @_thicket_annotate def __str__(self): s = ( "graph: " @@ -154,6 +161,7 @@ def __str__(self): return s @staticmethod + @_thicket_annotate def profile_hasher(obj, hex_len=8): """Convert an object to a profile hash for Thicket. @@ -168,6 +176,7 @@ def profile_hasher(obj, hex_len=8): return int(md5(obj.encode("utf-8")).hexdigest()[:hex_len], 16) @staticmethod + @_thicket_annotate def thicketize_graphframe(gf, prf): """Necessary function to handle output from using GraphFrame readers. @@ -211,15 +220,18 @@ def thicketize_graphframe(gf, prf): return th @staticmethod + @_thicket_annotate def from_pickle(filename, **kwargs): """Read in a Thicket from a pickle file.""" return pickle.load(open(filename, "rb"), **kwargs) + @_thicket_annotate def to_pickle(self, filename, **kwargs): """Write a Thicket to a pickle file.""" pickle.dump(self, open(filename, "wb"), **kwargs) @staticmethod + @_thicket_annotate def from_caliper( filename_or_stream, query=None, @@ -247,6 +259,7 @@ def from_caliper( ) @staticmethod + @_thicket_annotate def from_hpctoolkit( dirname, intersection=False, fill_perfdata=True, disable_tqdm=False ): @@ -271,6 +284,7 @@ def from_hpctoolkit( ) @staticmethod + @_thicket_annotate def from_caliperreader( filename_or_caliperreader, intersection=False, @@ -295,6 +309,7 @@ def from_caliperreader( ) @staticmethod + @_thicket_annotate def from_literal(graph_dict): """Create a Thicket from a list of dictionarires. @@ -331,6 +346,7 @@ def from_literal(graph_dict): return tk @staticmethod + @_thicket_annotate def reader_dispatch( func, intersection, fill_perfdata, disable_tqdm, *args, **kwargs ): @@ -414,6 +430,7 @@ def reader_dispatch( return ens_list.pop(0) @staticmethod + @_thicket_annotate def concat_thickets( thickets, axis="index", calltree="union", disable_tqdm=False, **kwargs ): @@ -492,18 +509,21 @@ def _columns( return ct @staticmethod + @_thicket_annotate def columnar_join(thicket_list, header_list=None, metadata_key=None): raise ValueError( "columnar_join is deprecated. Use 'concat_thickets(axis='columns'...)' instead." ) @staticmethod + @_thicket_annotate def unify_ensemble(th_list, from_statsframes=False): raise ValueError( "unify_ensemble is deprecated. Use 'concat_thickets(axis='index'...)' instead." ) @staticmethod + @_thicket_annotate def from_json(json_thicket): # deserialize the json thicket_dict = json.loads(json_thicket) @@ -548,6 +568,7 @@ def from_json(json_thicket): # make and return thicket? return th + @_thicket_annotate def add_ncu(self, ncu_report_mapping, chosen_metrics=None, overwrite=False): """Add NCU data into the PerformanceDataFrame @@ -611,6 +632,7 @@ def _rep_agg_func(col): rsuffix="_right", ) + @_thicket_annotate def metadata_column_to_perfdata(self, metadata_key, overwrite=False, drop=False): """Add a column from the metadata table to the performance data table. @@ -641,6 +663,7 @@ def metadata_column_to_perfdata(self, metadata_key, overwrite=False, drop=False) if drop: self.metadata.drop(metadata_key, axis=1, inplace=True) + @_thicket_annotate def squash(self, update_inc_cols=True, new_statsframe=True): """Rewrite the Graph to include only nodes present in the performance data table's rows. @@ -788,6 +811,7 @@ def rewire(node, new_parent, visited): return new_tk + @_thicket_annotate def copy(self): """Return a partially shallow copy of the Thicket. @@ -819,6 +843,7 @@ def copy(self): statsframe_ops_cache=self.statsframe_ops_cache.copy(), ) + @_thicket_annotate def deepcopy(self): """Return a deep copy of the Thicket. @@ -852,6 +877,7 @@ def deepcopy(self): statsframe_ops_cache=self.statsframe_ops_cache.copy(), ) + @_thicket_annotate def tree( self, metric_column=None, @@ -1003,6 +1029,7 @@ def tree( ) @staticmethod + @_thicket_annotate def from_statsframes(tk_list, metadata_key=None, disable_tqdm=False): """Compose a list of Thickets with data in their statsframes. @@ -1095,6 +1122,7 @@ def _agg_to_set(obj): tk_copy_list, from_statsframes=True, disable_tqdm=disable_tqdm ) + @_thicket_annotate def to_json(self, ensemble=True, metadata=True, stats=True): jsonified_thicket = {} @@ -1138,6 +1166,7 @@ def to_json(self, ensemble=True, metadata=True, stats=True): return json.dumps(jsonified_thicket) + @_thicket_annotate def intersection(self): """Perform an intersection operation on a thicket. @@ -1161,6 +1190,7 @@ def intersection(self): return intersected_th + @_thicket_annotate def filter_metadata(self, select_function): """Filter thicket object based on a metadata key. @@ -1225,6 +1255,7 @@ def filter_metadata(self, select_function): return new_thicket + @_thicket_annotate def filter_profile(self, profile_list): """Filter thicket object based on a list of profiles. @@ -1246,6 +1277,7 @@ def filter_profile(self, profile_list): return new_thicket + @_thicket_annotate def filter(self, filter_func): """Overloaded generic filter function. @@ -1255,6 +1287,7 @@ def filter(self, filter_func): "Invalid function: thicket.filter(), please use thicket.filter_metadata() or thicket.filter_stats()" ) + @_thicket_annotate def query( self, query_obj, squash=True, update_inc_cols=True, multi_index_mode="off" ): @@ -1305,6 +1338,7 @@ def query( return filtered_th.squash(update_inc_cols=update_inc_cols) return filtered_th + @_thicket_annotate def query_stats(self, query_obj, squash=True, update_inc_cols=True): """Apply a Hatchet query to the Thicket object. @@ -1378,6 +1412,7 @@ def query_stats(self, query_obj, squash=True, update_inc_cols=True): return filtered_th + @_thicket_annotate def reapply_stats_operations(self): """Reapply most recent stats operations.""" @@ -1395,6 +1430,7 @@ def reapply_stats_operations(self): validate_nodes(self) + @_thicket_annotate def groupby(self, by): """Create sub-thickets based on unique values in metadata column(s). @@ -1445,6 +1481,7 @@ def groupby(self, by): return GroupBy(by, sub_thickets) + @_thicket_annotate def filter_stats(self, filter_function): """Filter thicket object based on a stats column. @@ -1477,6 +1514,7 @@ def filter_stats(self, filter_function): return new_thicket + @_thicket_annotate def get_unique_metadata(self): """Get unique values per column in metadata. @@ -1514,6 +1552,7 @@ def get_unique_metadata(self): return sorted_meta + @_thicket_annotate def _sync_profile_components(self, component): """Synchronize the Performance DataFrame, Metadata Dataframe, profile and profile mapping objects based on the component's index or a list of profiles. diff --git a/thicket/utils.py b/thicket/utils.py index 4373cf01..2f0a36d6 100644 --- a/thicket/utils.py +++ b/thicket/utils.py @@ -9,6 +9,8 @@ import numpy as np import pandas as pd +from hatchet.util.perf_measure import annotate + from thicket import helpers @@ -29,6 +31,7 @@ class InvalidNameError(ValueError): pass +@annotate() def check_same_frame(n1, n2): if n1.frame != n2.frame: raise ValueError( @@ -36,6 +39,7 @@ def check_same_frame(n1, n2): ) +@annotate() def check_duplicate_metadata_key(thickets, metadata_key): """Check for duplicate values in the metadata of a list of Thickets for column 'metadata_key'. @@ -65,6 +69,7 @@ def check_duplicate_metadata_key(thickets, metadata_key): ) +@annotate() def validate_dataframe(df): """Check validity of a Thicket DataFrame.""" @@ -105,6 +110,7 @@ def _validate_name_column(df): _validate_name_column(df) +@annotate() def validate_profile(tk): """Check validity of Thicket objects that rely on profiles. Thicket.dataframe, Thicket.metadata, Thicket.profile, Thicket.profile_mapping.""" @@ -163,6 +169,7 @@ def _validate_no_duplicates(tk): _validate_no_duplicates(tk) +@annotate() def verify_sorted_profile(thicket_component): """Assertion to check if profiles are sorted in a thicket dataframe @@ -182,6 +189,7 @@ def verify_sorted_profile(thicket_component): ) +@annotate() def verify_thicket_structures(thicket_component, columns=[], index=[]): """Assertion for missing input requirements to execute thicket functions. @@ -229,6 +237,7 @@ def verify_thicket_structures(thicket_component, columns=[], index=[]): ) +@annotate() def validate_nodes(tk): """Check if node objects match between Thicket.graph, Thicket.dataframe, and Thicket.statsframe.dataframe.""" @@ -260,6 +269,7 @@ def validate_nodes(tk): ) +@annotate() def _fill_perfdata(df): """Create full index for DataFrame and fill created rows with NaN's or Nones where applicable.