initial add

poldrack · poldrack · commit ac3334a1e610 · 2025-03-19T07:23:21.000-07:00
diff --git a/src/rtanalysis/__init__.py b/src/rtanalysis/__init__.py
@@ -0,0 +1 @@
+"""Analysis of response data to estimate accuracy from response time (RT)."""
diff --git a/src/rtanalysis/generate_testdata.py b/src/rtanalysis/generate_testdata.py
@@ -0,0 +1,65 @@
+"""Utility module for handling the generation of test data."""
+
+import numpy as np
+import pandas as pd
+import scipy.stats
+
+
+def generate_test_df(mean_rt, sd_rt, mean_accuracy, n=100):
+    """Generate simulated RT data for testing.
+
+    Parameters
+    ----------
+    mean_rt : float
+        Mean response time for correct trials
+    sd_rt : float
+        Standard deviation of the response time in correct trials
+    mean_accuracy : float
+        Mean accuracy across trials (between 0 and 1)
+    n : int, optional
+        Number of observations to generate, by default 100
+
+    Returns
+    -------
+    pd.DataFrame
+        Generated mock data
+    """
+    rt = pd.Series(scipy.stats.weibull_min.rvs(2, loc=1, size=n))
+
+    # get random accuracy values and threshold for intended proportion
+    accuracy_continuous = np.random.rand(n)
+    accuracy = pd.Series(
+        accuracy_continuous
+        < scipy.stats.scoreatpercentile(accuracy_continuous, 100 * mean_accuracy)
+    )
+
+    # scale the correct RTs only
+    rt_correct = rt.mask(~accuracy)
+    rt_scaled = scale_values(rt_correct, mean_rt, sd_rt)
+
+    # NB: .where() replaces values where the condition is False
+    rt_scaled_with_inaccurate_rts = rt_scaled.where(accuracy, rt)
+
+    return pd.DataFrame({"rt": rt_scaled_with_inaccurate_rts, "accuracy": accuracy})
+
+
+def scale_values(values, mean, sd):
+    """Scale values by given mean/SD.
+
+    Parameters
+    ----------
+    values : array-like
+        Values to be scaled
+    mean : float
+        Target mean
+    sd : float
+        Target standard deviation
+
+    Returns
+    -------
+    array-like
+        Scaled values
+    """
+    values = values * (sd / np.std(values))
+    values = (values - np.mean(values)) + mean
+    return values
diff --git a/src/rtanalysis/rtanalysis.py b/src/rtanalysis/rtanalysis.py
@@ -0,0 +1,118 @@
+"""Example class to analyze reaction times.
+
+Given a data frame with RT and accuracy, compute mean RT for correct trials and
+mean accuracy.
+"""
+
+import pandas as pd
+
+
+class RTAnalysis:
+    """Response time (RT) analysis."""
+
+    def __init__(self, outlier_cutoff_sd=None):
+        """Initialize a new RTAnalysis instance.
+
+        Parameters
+        ----------
+        outlier_cutoff_sd : float, optional
+            Standard deviation cutoff for long RT outliers, by default None
+        """
+        self.outlier_cutoff_sd = outlier_cutoff_sd
+        self.mean_rt_ = None
+        self.mean_accuracy_ = None
+
+    def fit(self, rt, accuracy, verbose=True):
+        """Fit response time to accuracy.
+
+        Parameters
+        ----------
+        rt : pd.Series
+            Response time per trial
+        accuracy : pd.Series
+            Accuracy per trial
+        verbose : bool, optional
+            Whether to print verbose output or not, by default True
+
+        Raises
+        ------
+        ValueError
+            RT/accuracy length mismatch
+        ValueError
+            Accuracy is 0
+        """
+        rt = self._ensure_series_type(rt)
+        accuracy = self._ensure_series_type(accuracy)
+
+        self._validate_length(rt, accuracy)
+
+        # Ensure that accuracy values are boolean.
+        assert accuracy.dtype == bool
+
+        rt = self.reject_outlier_rt(rt, verbose=verbose)
+
+        self.mean_accuracy_ = accuracy.mean()
+        try:
+            assert self.mean_accuracy_ > 0
+        except AssertionError as e:
+            raise ValueError("Accuracy is zero!") from e
+
+        rt = rt.mask(~accuracy)
+        self.mean_rt_ = rt.mean()
+
+        try:
+            assert rt.min() > 0
+        except:
+            raise ValueError("negative response times found")
+        if verbose:
+            print(f"mean RT: {self.mean_rt_}")
+            print(f"mean accuracy: {self.mean_accuracy_}")
+
+    @staticmethod
+    def _validate_length(rt, accuracy):
+        """Validate response time and accuracy series lengths.
+
+        Parameters
+        ----------
+        rt : pd.Series
+            Response time values
+        accuracy : _type_
+            Accuracy values
+
+        Raises
+        ------
+        ValueError
+            Length mismatch
+        """
+        same_length = rt.shape[0] == accuracy.shape[0]
+        try:
+            assert same_length
+        except AssertionError as e:
+            raise ValueError("RT and accuracy must be the same length!") from e
+
+    @staticmethod
+    def _ensure_series_type(var):
+        """Return variable as a pandas Series.
+
+        Parameters
+        ----------
+        var : Iterable
+            Variable to be converted
+
+        Returns
+        -------
+        pd.Series
+            Variable values as a pandas Series
+        """
+        if not isinstance(var, pd.Series):
+            var = pd.Series(var)
+        return var
+
+    def reject_outlier_rt(self, rt, verbose=True):
+        if self.outlier_cutoff_sd is None:
+            return rt
+        cutoff = rt.std() * self.outlier_cutoff_sd
+        if verbose:
+            n_excluded = (rt > cutoff).sum()
+            print(f"Outlier rejection excluded {n_excluded} trials.")
+        return rt.mask(rt > cutoff)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+"""Analysis of response data to estimate accuracy from response time (RT)."""`