Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 36 additions & 8 deletions tableone/tableone.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ class TableOne(object):
`holm-sidak` : step down method using Sidak adjustments
`simes-hochberg` : step-up method (independent)
`hommel` : closed method based on Simes tests (non-negative)

test_stat : bool, optional
Display statistics of hypothesis testing (default: False).
htest_name : bool, optional
Display a column with the names of hypothesis tests (default: False).
htest : dict, optional
Expand Down Expand Up @@ -206,7 +207,8 @@ def __init__(self, data: pd.DataFrame, columns: Optional[list] = None,
nonnormal: Optional[list] = None,
min_max: Optional[list] = None, pval: Optional[bool] = False,
pval_adjust: Optional[str] = None, htest_name: bool = False,
pval_test_name: bool = False, htest: Optional[dict] = None,
pval_test_name: bool = False, test_stat: bool = False,
htest: Optional[dict] = None,
isnull: Optional[bool] = None, missing: bool = True,
ddof: int = 1, labels: Optional[dict] = None,
rename: Optional[dict] = None, sort: Union[bool, str] = False,
Expand Down Expand Up @@ -345,6 +347,7 @@ def __init__(self, data: pd.DataFrame, columns: Optional[list] = None,
self._min_max = min_max
self._pval = pval
self._pval_adjust = pval_adjust
self._test_stat = test_stat
self._htest = htest
self._sort = sort
self._groupby = groupby
Expand Down Expand Up @@ -386,6 +389,8 @@ def __init__(self, data: pd.DataFrame, columns: Optional[list] = None,
if self._pval:
self._htest_table = self._create_htest_table(data)

if test_stat and not pval:
self._htest_table = self._create_htest_table(data)
# correct for multiple testing
if self._pval and self._pval_adjust:
alpha = 0.05
Expand Down Expand Up @@ -1072,7 +1077,7 @@ def _create_htest_table(self, data):
# list features of the variable e.g. matched, paired, n_expected
df = pd.DataFrame(index=self._continuous+self._categorical,
columns=['continuous', 'nonnormal',
'min_observed', 'P-Value', 'Test'])
'min_observed', 'Test_stat', 'P-Value', 'Test'])

df.index = df.index.rename('variable')
df['continuous'] = np.where(df.index.isin(self._continuous),
Expand Down Expand Up @@ -1111,7 +1116,7 @@ def _create_htest_table(self, data):
df.loc[v, 'min_observed'] = min_observed

# compute pvalues
(df.loc[v, 'P-Value'],
(df.loc[v,'Test-stat'],df.loc[v, 'P-Value'],
df.loc[v, 'Test']) = self._p_test(v, grouped_data,
is_continuous,
is_categorical, is_normal,
Expand Down Expand Up @@ -1204,6 +1209,8 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical,

Returns
----------
test_stat : float
The statistics of hypothesis testing
pval : float
The computed P-Value.
ptest : str
Expand Down Expand Up @@ -1247,14 +1254,22 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical,
# default to chi-squared
ptest = 'Chi-squared'
grouped_val_list = [x for x in grouped_data.values()]
_, pval, _, expected = stats.chi2_contingency(
test_stat, pval, _, expected = stats.chi2_contingency(
grouped_val_list)
# if any expected cell counts are < 5, chi2 may not be valid
# if this is a 2x2, switch to fisher exact
if expected.min() < 5 or min_observed < 5:
if np.shape(grouped_val_list) == (2, 2):
ptest = "Fisher's exact"
odds_ratio, pval = stats.fisher_exact(grouped_val_list)
test_stat = np.nan
fisher_stat_warn = ("Fisher's test did not caompute "
"statistics of hypothesis testing. "
"The following variables are affected")
try:
self._warnings[fisher_stat_warn].append(v)
except KeyError:
self._warnings[fisher_stat_warn] = [v]
else:
ptest = "Chi-squared (warning: expected count < 5)"
chi_warn = ("Chi-squared tests for the following "
Expand All @@ -1265,7 +1280,7 @@ def _p_test(self, v, grouped_data, is_continuous, is_categorical,
except KeyError:
self._warnings[chi_warn] = [v]

return pval, ptest
return test_stat, pval, ptest

def _create_cont_table(self, data, overall):
"""
Expand Down Expand Up @@ -1294,6 +1309,10 @@ def _create_cont_table(self, data, overall):
table['value'] = ''
table = table.set_index([table.index, 'value'])

# add test_stat column
if self._test_stat:
table = table.join(self._htest_table[['Test-stat']])

# add pval column
if self._pval and self._pval_adjust:
table = table.join(self._htest_table[['P-Value (adjusted)',
Expand Down Expand Up @@ -1334,6 +1353,10 @@ def _create_cat_table(self, data, overall):
table.columns = table.columns.astype(str)
table = table.join(isnull)

# add test_stat column
if self._test_stat:
table = table.join(self._htest_table[['Test-stat']])

# add pval column
if self._pval and self._pval_adjust:
table = table.join(self._htest_table[['P-Value (adjusted)',
Expand Down Expand Up @@ -1378,7 +1401,7 @@ def _create_tableone(self, data):
table.columns = table.columns.values.astype(str)

# sort the table rows
sort_columns = ['Missing', 'P-Value', 'P-Value (adjusted)', 'Test']
sort_columns = ['Missing', 'Test_stat','P-Value', 'P-Value (adjusted)', 'Test']
if self._smd:
sort_columns = sort_columns + list(self.smd_table.columns)

Expand Down Expand Up @@ -1415,6 +1438,11 @@ def _create_tableone(self, data):
'{:.3f}'.format).astype(str)
table.loc[table['P-Value'] == '0.000', 'P-Value'] = '<0.001'

# round test-stat column and convert to string
if self._test_stat:
table['Test-stat'] = table['Test-stat'].apply(
'{:.3f}'.format).astype(str)

# round smd columns and convert to string
if self._smd:
for c in list(self.smd_table.columns):
Expand Down Expand Up @@ -1510,7 +1538,7 @@ def _create_tableone(self, data):
# only display data in first level row
dupe_mask = table.groupby(level=[0]).cumcount().ne(0)
dupe_columns = ['Missing']
optional_columns = ['P-Value', 'P-Value (adjusted)', 'Test']
optional_columns = ['Test-stat', 'P-Value', 'P-Value (adjusted)', 'Test']
if self._smd:
optional_columns = optional_columns + list(self.smd_table.columns)
for col in optional_columns:
Expand Down