From cad10c5595eca5c61f95af1d23bdb80898298a77 Mon Sep 17 00:00:00 2001
From: 260147169 <260147169@qq.com>
Date: Sat, 8 Oct 2022 00:20:15 +0800
Subject: [PATCH 1/2] Add the statistics of hypothesis testing

Add an option 'test_stat' to display statistics of hypothesis testing (default: False). The statistics are already computed. This option is only displaying.
---
 tableone/tableone.py | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/tableone/tableone.py b/tableone/tableone.py
index a78b255..4ba7a99 100644
--- a/tableone/tableone.py
+++ b/tableone/tableone.py
@@ -108,7 +108,8 @@ class TableOne(object):
         `holm-sidak` : step down method using Sidak adjustments
         `simes-hochberg` : step-up method (independent)
         `hommel` : closed method based on Simes tests (non-negative)
-
+    test_stat : bool, optional
+        Display statistics of hypothesis testing (default: False).
     htest_name : bool, optional
         Display a column with the names of hypothesis tests (default: False).
     htest : dict, optional
@@ -206,7 +207,8 @@ def __init__(self, data: pd.DataFrame, columns: Optional[list] = None,
                  nonnormal: Optional[list] = None,
                  min_max: Optional[list] = None, pval: Optional[bool] = False,
                  pval_adjust: Optional[str] = None, htest_name: bool = False,
-                 pval_test_name: bool = False, htest: Optional[dict] = None,
+                 pval_test_name: bool = False, test_stat: bool = False,
+                 htest: Optional[dict] = None,
                  isnull: Optional[bool] = None, missing: bool = True,
                  ddof: int = 1, labels: Optional[dict] = None,
                  rename: Optional[dict] = None, sort: Union[bool, str] = False,
@@ -345,6 +347,7 @@ def __init__(self, data: pd.DataFrame, columns: Optional[list] = None,
         self._min_max = min_max
         self._pval = pval
         self._pval_adjust = pval_adjust
+        self._test_stat = test_stat
         self._htest = htest
         self._sort = sort
         self._groupby = groupby
@@ -1072,7 +1075,7 @@ def _create_htest_table(self, data):
         # list features of the variable e.g. matched, paired, n_expected
         df = pd.DataFrame(index=self._continuous+self._categorical,
                           columns=['continuous', 'nonnormal',
-                                   'min_observed', 'P-Value', 'Test'])
+                                   'min_observed', 'Test_stat', 'P-Value', 'Test'])
 
         df.index = df.index.rename('variable')
         df['continuous'] = np.where(df.index.isin(self._continuous),
@@ -1111,7 +1114,7 @@ def _create_htest_table(self, data):
             df.loc[v, 'min_observed'] = min_observed
 
             # compute pvalues
-            (df.loc[v, 'P-Value'],
+            (df.loc[v,'Test-stat'],df.loc[v, 'P-Value'],
                 df.loc[v, 'Test']) = self._p_test(v, grouped_data,
                                                   is_continuous,
                                                   is_categorical, is_normal,
@@ -1204,6 +1207,8 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical,
 
         Returns
         ----------
+            test_stat : float
+                The statistics of hypothesis testing
             pval : float
                 The computed P-Value.
             ptest : str
@@ -1247,7 +1252,7 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical,
             # default to chi-squared
             ptest = 'Chi-squared'
             grouped_val_list = [x for x in grouped_data.values()]
-            _, pval, _, expected = stats.chi2_contingency(
+            test_stat, pval, _, expected = stats.chi2_contingency(
                 grouped_val_list)
             # if any expected cell counts are < 5, chi2 may not be valid
             # if this is a 2x2, switch to fisher exact
@@ -1265,7 +1270,7 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical,
                     except KeyError:
                         self._warnings[chi_warn] = [v]
 
-        return pval, ptest
+        return test_stat, pval, ptest
 
     def _create_cont_table(self, data, overall):
         """
@@ -1294,6 +1299,10 @@ def _create_cont_table(self, data, overall):
         table['value'] = ''
         table = table.set_index([table.index, 'value'])
 
+        # add test_stat column
+        if self._test_stat:
+            table = table.join(self._htest_table[['Test-stat']])
+
         # add pval column
         if self._pval and self._pval_adjust:
             table = table.join(self._htest_table[['P-Value (adjusted)',
@@ -1334,6 +1343,10 @@ def _create_cat_table(self, data, overall):
             table.columns = table.columns.astype(str)
             table = table.join(isnull)
 
+        # add test_stat column
+        if self._test_stat:
+            table = table.join(self._htest_table[['Test-stat']])
+
         # add pval column
         if self._pval and self._pval_adjust:
             table = table.join(self._htest_table[['P-Value (adjusted)',
@@ -1378,7 +1391,7 @@ def _create_tableone(self, data):
         table.columns = table.columns.values.astype(str)
 
         # sort the table rows
-        sort_columns = ['Missing', 'P-Value', 'P-Value (adjusted)', 'Test']
+        sort_columns = ['Missing', 'Test_stat','P-Value', 'P-Value (adjusted)', 'Test']
         if self._smd:
             sort_columns = sort_columns + list(self.smd_table.columns)
 
@@ -1415,6 +1428,11 @@ def _create_tableone(self, data):
                                      '{:.3f}'.format).astype(str)
             table.loc[table['P-Value'] == '0.000', 'P-Value'] = '<0.001'
 
+        # round test-stat column and convert to string
+        if self._test_stat:
+            table['Test-stat'] = table['Test-stat'].apply(
+                                                '{:.3f}'.format).astype(str)
+
         # round smd columns and convert to string
         if self._smd:
             for c in list(self.smd_table.columns):
@@ -1510,7 +1528,7 @@ def _create_tableone(self, data):
         # only display data in first level row
         dupe_mask = table.groupby(level=[0]).cumcount().ne(0)
         dupe_columns = ['Missing']
-        optional_columns = ['P-Value', 'P-Value (adjusted)', 'Test']
+        optional_columns = ['Test_stat', 'P-Value', 'P-Value (adjusted)', 'Test']
         if self._smd:
             optional_columns = optional_columns + list(self.smd_table.columns)
         for col in optional_columns:

From 849f56f98823a339c087aa927812d804dd54597b Mon Sep 17 00:00:00 2001
From: 260147169 <260147169@qq.com>
Date: Sat, 22 Oct 2022 19:36:15 +0800
Subject: [PATCH 2/2] Update test_stat

---
 tableone/tableone.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tableone/tableone.py b/tableone/tableone.py
index 4ba7a99..828d08f 100644
--- a/tableone/tableone.py
+++ b/tableone/tableone.py
@@ -389,6 +389,8 @@ def __init__(self, data: pd.DataFrame, columns: Optional[list] = None,
         if self._pval:
             self._htest_table = self._create_htest_table(data)
 
+        if test_stat and not pval:
+            self._htest_table = self._create_htest_table(data)
         # correct for multiple testing
         if self._pval and self._pval_adjust:
             alpha = 0.05
@@ -1260,6 +1262,14 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical,
                 if np.shape(grouped_val_list) == (2, 2):
                     ptest = "Fisher's exact"
                     odds_ratio, pval = stats.fisher_exact(grouped_val_list)
+                    test_stat = np.nan
+                    fisher_stat_warn = ("Fisher's test did not caompute "
+                                        "statistics of hypothesis testing. "
+                                        "The following variables are affected")
+                    try:
+                        self._warnings[fisher_stat_warn].append(v)
+                    except KeyError:
+                        self._warnings[fisher_stat_warn] = [v]
                 else:
                     ptest = "Chi-squared (warning: expected count < 5)"
                     chi_warn = ("Chi-squared tests for the following "
@@ -1528,7 +1538,7 @@ def _create_tableone(self, data):
         # only display data in first level row
         dupe_mask = table.groupby(level=[0]).cumcount().ne(0)
         dupe_columns = ['Missing']
-        optional_columns = ['Test_stat', 'P-Value', 'P-Value (adjusted)', 'Test']
+        optional_columns = ['Test-stat', 'P-Value', 'P-Value (adjusted)', 'Test']
         if self._smd:
             optional_columns = optional_columns + list(self.smd_table.columns)
         for col in optional_columns: