tompollard · 260147169 · Oct 7, 2022 · Oct 22, 2022
diff --git a/tableone/tableone.py b/tableone/tableone.py
@@ -108,7 +108,8 @@ class TableOne(object):
         `holm-sidak` : step down method using Sidak adjustments
         `simes-hochberg` : step-up method (independent)
         `hommel` : closed method based on Simes tests (non-negative)
-
+    test_stat : bool, optional
+        Display statistics of hypothesis testing (default: False).
     htest_name : bool, optional
         Display a column with the names of hypothesis tests (default: False).
     htest : dict, optional
@@ -206,7 +207,8 @@ def __init__(self, data: pd.DataFrame, columns: Optional[list] = None,
                  nonnormal: Optional[list] = None,
                  min_max: Optional[list] = None, pval: Optional[bool] = False,
                  pval_adjust: Optional[str] = None, htest_name: bool = False,
-                 pval_test_name: bool = False, htest: Optional[dict] = None,
+                 pval_test_name: bool = False, test_stat: bool = False,
+                 htest: Optional[dict] = None,
                  isnull: Optional[bool] = None, missing: bool = True,
                  ddof: int = 1, labels: Optional[dict] = None,
                  rename: Optional[dict] = None, sort: Union[bool, str] = False,
@@ -345,6 +347,7 @@ def __init__(self, data: pd.DataFrame, columns: Optional[list] = None,
         self._min_max = min_max
         self._pval = pval
         self._pval_adjust = pval_adjust
+        self._test_stat = test_stat
         self._htest = htest
         self._sort = sort
         self._groupby = groupby
@@ -386,6 +389,8 @@ def __init__(self, data: pd.DataFrame, columns: Optional[list] = None,
         if self._pval:
             self._htest_table = self._create_htest_table(data)
 
+        if test_stat and not pval:
+            self._htest_table = self._create_htest_table(data)
         # correct for multiple testing
         if self._pval and self._pval_adjust:
             alpha = 0.05
@@ -1072,7 +1077,7 @@ def _create_htest_table(self, data):
         # list features of the variable e.g. matched, paired, n_expected
         df = pd.DataFrame(index=self._continuous+self._categorical,
                           columns=['continuous', 'nonnormal',
-                                   'min_observed', 'P-Value', 'Test'])
+                                   'min_observed', 'Test_stat', 'P-Value', 'Test'])
 
         df.index = df.index.rename('variable')
         df['continuous'] = np.where(df.index.isin(self._continuous),
@@ -1111,7 +1116,7 @@ def _create_htest_table(self, data):
             df.loc[v, 'min_observed'] = min_observed
 
             # compute pvalues
-            (df.loc[v, 'P-Value'],
+            (df.loc[v,'Test-stat'],df.loc[v, 'P-Value'],
                 df.loc[v, 'Test']) = self._p_test(v, grouped_data,
                                                   is_continuous,
                                                   is_categorical, is_normal,
@@ -1204,6 +1209,8 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical,
 
         Returns
         ----------
+            test_stat : float
+                The statistics of hypothesis testing
             pval : float
                 The computed P-Value.
             ptest : str
@@ -1247,14 +1254,22 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical,
             # default to chi-squared
             ptest = 'Chi-squared'
             grouped_val_list = [x for x in grouped_data.values()]
-            _, pval, _, expected = stats.chi2_contingency(
+            test_stat, pval, _, expected = stats.chi2_contingency(
                 grouped_val_list)
             # if any expected cell counts are < 5, chi2 may not be valid
             # if this is a 2x2, switch to fisher exact
             if expected.min() < 5 or min_observed < 5:
                 if np.shape(grouped_val_list) == (2, 2):
                     ptest = "Fisher's exact"
                     odds_ratio, pval = stats.fisher_exact(grouped_val_list)
+                    test_stat = np.nan
+                    fisher_stat_warn = ("Fisher's test did not caompute "
+                                        "statistics of hypothesis testing. "
+                                        "The following variables are affected")
+                    try:
+                        self._warnings[fisher_stat_warn].append(v)
+                    except KeyError:
+                        self._warnings[fisher_stat_warn] = [v]
                 else:
                     ptest = "Chi-squared (warning: expected count < 5)"
                     chi_warn = ("Chi-squared tests for the following "
@@ -1265,7 +1280,7 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical,
                     except KeyError:
                         self._warnings[chi_warn] = [v]
 
-        return pval, ptest
+        return test_stat, pval, ptest
 
     def _create_cont_table(self, data, overall):
         """
@@ -1294,6 +1309,10 @@ def _create_cont_table(self, data, overall):
         table['value'] = ''
         table = table.set_index([table.index, 'value'])
 
+        # add test_stat column
+        if self._test_stat:
+            table = table.join(self._htest_table[['Test-stat']])
+
         # add pval column
         if self._pval and self._pval_adjust:
             table = table.join(self._htest_table[['P-Value (adjusted)',
@@ -1334,6 +1353,10 @@ def _create_cat_table(self, data, overall):
             table.columns = table.columns.astype(str)
             table = table.join(isnull)
 
+        # add test_stat column
+        if self._test_stat:
+            table = table.join(self._htest_table[['Test-stat']])
+
         # add pval column
         if self._pval and self._pval_adjust:
             table = table.join(self._htest_table[['P-Value (adjusted)',
@@ -1378,7 +1401,7 @@ def _create_tableone(self, data):
         table.columns = table.columns.values.astype(str)
 
         # sort the table rows
-        sort_columns = ['Missing', 'P-Value', 'P-Value (adjusted)', 'Test']
+        sort_columns = ['Missing', 'Test_stat','P-Value', 'P-Value (adjusted)', 'Test']
         if self._smd:
             sort_columns = sort_columns + list(self.smd_table.columns)
 
@@ -1415,6 +1438,11 @@ def _create_tableone(self, data):
                                      '{:.3f}'.format).astype(str)
             table.loc[table['P-Value'] == '0.000', 'P-Value'] = '<0.001'
 
+        # round test-stat column and convert to string
+        if self._test_stat:
+            table['Test-stat'] = table['Test-stat'].apply(
+                                                '{:.3f}'.format).astype(str)
+
         # round smd columns and convert to string
         if self._smd:
             for c in list(self.smd_table.columns):
@@ -1510,7 +1538,7 @@ def _create_tableone(self, data):
         # only display data in first level row
         dupe_mask = table.groupby(level=[0]).cumcount().ne(0)
         dupe_columns = ['Missing']
-        optional_columns = ['P-Value', 'P-Value (adjusted)', 'Test']
+        optional_columns = ['Test-stat', 'P-Value', 'P-Value (adjusted)', 'Test']
         if self._smd:
             optional_columns = optional_columns + list(self.smd_table.columns)
         for col in optional_columns: