From cad10c5595eca5c61f95af1d23bdb80898298a77 Mon Sep 17 00:00:00 2001 From: 260147169 <260147169@qq.com> Date: Sat, 8 Oct 2022 00:20:15 +0800 Subject: [PATCH 1/2] Add the statistics of hypothesis testing Add an option 'test_stat' to display statistics of hypothesis testing (default: False). The statistics are already computed. This option is only displaying. --- tableone/tableone.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/tableone/tableone.py b/tableone/tableone.py index a78b255..4ba7a99 100644 --- a/tableone/tableone.py +++ b/tableone/tableone.py @@ -108,7 +108,8 @@ class TableOne(object): `holm-sidak` : step down method using Sidak adjustments `simes-hochberg` : step-up method (independent) `hommel` : closed method based on Simes tests (non-negative) - + test_stat : bool, optional + Display statistics of hypothesis testing (default: False). htest_name : bool, optional Display a column with the names of hypothesis tests (default: False). htest : dict, optional @@ -206,7 +207,8 @@ def __init__(self, data: pd.DataFrame, columns: Optional[list] = None, nonnormal: Optional[list] = None, min_max: Optional[list] = None, pval: Optional[bool] = False, pval_adjust: Optional[str] = None, htest_name: bool = False, - pval_test_name: bool = False, htest: Optional[dict] = None, + pval_test_name: bool = False, test_stat: bool = False, + htest: Optional[dict] = None, isnull: Optional[bool] = None, missing: bool = True, ddof: int = 1, labels: Optional[dict] = None, rename: Optional[dict] = None, sort: Union[bool, str] = False, @@ -345,6 +347,7 @@ def __init__(self, data: pd.DataFrame, columns: Optional[list] = None, self._min_max = min_max self._pval = pval self._pval_adjust = pval_adjust + self._test_stat = test_stat self._htest = htest self._sort = sort self._groupby = groupby @@ -1072,7 +1075,7 @@ def _create_htest_table(self, data): # list features of the variable e.g. matched, paired, n_expected df = pd.DataFrame(index=self._continuous+self._categorical, columns=['continuous', 'nonnormal', - 'min_observed', 'P-Value', 'Test']) + 'min_observed', 'Test_stat', 'P-Value', 'Test']) df.index = df.index.rename('variable') df['continuous'] = np.where(df.index.isin(self._continuous), @@ -1111,7 +1114,7 @@ def _create_htest_table(self, data): df.loc[v, 'min_observed'] = min_observed # compute pvalues - (df.loc[v, 'P-Value'], + (df.loc[v,'Test-stat'],df.loc[v, 'P-Value'], df.loc[v, 'Test']) = self._p_test(v, grouped_data, is_continuous, is_categorical, is_normal, @@ -1204,6 +1207,8 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical, Returns ---------- + test_stat : float + The statistics of hypothesis testing pval : float The computed P-Value. ptest : str @@ -1247,7 +1252,7 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical, # default to chi-squared ptest = 'Chi-squared' grouped_val_list = [x for x in grouped_data.values()] - _, pval, _, expected = stats.chi2_contingency( + test_stat, pval, _, expected = stats.chi2_contingency( grouped_val_list) # if any expected cell counts are < 5, chi2 may not be valid # if this is a 2x2, switch to fisher exact @@ -1265,7 +1270,7 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical, except KeyError: self._warnings[chi_warn] = [v] - return pval, ptest + return test_stat, pval, ptest def _create_cont_table(self, data, overall): """ @@ -1294,6 +1299,10 @@ def _create_cont_table(self, data, overall): table['value'] = '' table = table.set_index([table.index, 'value']) + # add test_stat column + if self._test_stat: + table = table.join(self._htest_table[['Test-stat']]) + # add pval column if self._pval and self._pval_adjust: table = table.join(self._htest_table[['P-Value (adjusted)', @@ -1334,6 +1343,10 @@ def _create_cat_table(self, data, overall): table.columns = table.columns.astype(str) table = table.join(isnull) + # add test_stat column + if self._test_stat: + table = table.join(self._htest_table[['Test-stat']]) + # add pval column if self._pval and self._pval_adjust: table = table.join(self._htest_table[['P-Value (adjusted)', @@ -1378,7 +1391,7 @@ def _create_tableone(self, data): table.columns = table.columns.values.astype(str) # sort the table rows - sort_columns = ['Missing', 'P-Value', 'P-Value (adjusted)', 'Test'] + sort_columns = ['Missing', 'Test_stat','P-Value', 'P-Value (adjusted)', 'Test'] if self._smd: sort_columns = sort_columns + list(self.smd_table.columns) @@ -1415,6 +1428,11 @@ def _create_tableone(self, data): '{:.3f}'.format).astype(str) table.loc[table['P-Value'] == '0.000', 'P-Value'] = '<0.001' + # round test-stat column and convert to string + if self._test_stat: + table['Test-stat'] = table['Test-stat'].apply( + '{:.3f}'.format).astype(str) + # round smd columns and convert to string if self._smd: for c in list(self.smd_table.columns): @@ -1510,7 +1528,7 @@ def _create_tableone(self, data): # only display data in first level row dupe_mask = table.groupby(level=[0]).cumcount().ne(0) dupe_columns = ['Missing'] - optional_columns = ['P-Value', 'P-Value (adjusted)', 'Test'] + optional_columns = ['Test_stat', 'P-Value', 'P-Value (adjusted)', 'Test'] if self._smd: optional_columns = optional_columns + list(self.smd_table.columns) for col in optional_columns: From 849f56f98823a339c087aa927812d804dd54597b Mon Sep 17 00:00:00 2001 From: 260147169 <260147169@qq.com> Date: Sat, 22 Oct 2022 19:36:15 +0800 Subject: [PATCH 2/2] Update test_stat --- tableone/tableone.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tableone/tableone.py b/tableone/tableone.py index 4ba7a99..828d08f 100644 --- a/tableone/tableone.py +++ b/tableone/tableone.py @@ -389,6 +389,8 @@ def __init__(self, data: pd.DataFrame, columns: Optional[list] = None, if self._pval: self._htest_table = self._create_htest_table(data) + if test_stat and not pval: + self._htest_table = self._create_htest_table(data) # correct for multiple testing if self._pval and self._pval_adjust: alpha = 0.05 @@ -1260,6 +1262,14 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical, if np.shape(grouped_val_list) == (2, 2): ptest = "Fisher's exact" odds_ratio, pval = stats.fisher_exact(grouped_val_list) + test_stat = np.nan + fisher_stat_warn = ("Fisher's test did not caompute " + "statistics of hypothesis testing. " + "The following variables are affected") + try: + self._warnings[fisher_stat_warn].append(v) + except KeyError: + self._warnings[fisher_stat_warn] = [v] else: ptest = "Chi-squared (warning: expected count < 5)" chi_warn = ("Chi-squared tests for the following " @@ -1528,7 +1538,7 @@ def _create_tableone(self, data): # only display data in first level row dupe_mask = table.groupby(level=[0]).cumcount().ne(0) dupe_columns = ['Missing'] - optional_columns = ['Test_stat', 'P-Value', 'P-Value (adjusted)', 'Test'] + optional_columns = ['Test-stat', 'P-Value', 'P-Value (adjusted)', 'Test'] if self._smd: optional_columns = optional_columns + list(self.smd_table.columns) for col in optional_columns: