From 08ae309cc0bbfe1105e12cc09de7bd32f14a876e Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Wed, 30 Jun 2021 12:04:25 -0400
Subject: [PATCH 01/21] Add n option to get_studies_by_coordinate

---
 nimare/dataset.py            | 42 ++++++++++++++++++++++++++++--------
 nimare/tests/test_dataset.py | 11 +++++++++-
 2 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/nimare/dataset.py b/nimare/dataset.py
index dc7d20f5e..f9d61fb85 100755
--- a/nimare/dataset.py
+++ b/nimare/dataset.py
@@ -548,7 +548,8 @@ def get_studies_by_label(self, labels=None, label_threshold=0.001):
             the labels above the threshold, it will be returned.
             Default is None.
         label_threshold : :obj:`float`, optional
-            Default is 0.5.
+            Default is 0.001, which corresponds to terms appearing at least once in abstracts
+            for the commonly-used Neurosynth TF-IDF feature set.
 
         Returns
         -------
@@ -600,27 +601,50 @@ def get_studies_by_mask(self, mask):
         found_ids = list(self.coordinates.loc[distances, "id"].unique())
         return found_ids
 
-    def get_studies_by_coordinate(self, xyz, r=20):
+    def get_studies_by_coordinate(self, xyz, r=None, n=None):
         """Extract list of studies with at least one focus within radius of requested coordinates.
 
+        .. versionchanged:: 0.0.9
+
+            New option (n) supported to identify closest n studies to coordinate.
+            Default value for r changed to None.
+
         Parameters
         ----------
         xyz : (X x 3) array_like
             List of coordinates against which to find studies.
         r : :obj:`float`, optional
-            Radius (in mm) within which to find studies. Default is 20mm.
+            Radius (in mm) within which to find studies. Mutually exclusive with ``n``.
+            Default is None.
+        n : :obj:`int`, optional
+            Number of closest studies to identify. Mutually exclusive with ``r``.
+            Default is None.
 
         Returns
         -------
         found_ids : :obj:`list`
-            A list of IDs from the Dataset with at least one focus within
-            radius r of requested coordinates.
+            A list of IDs from the Dataset matching the search criterion.
         """
+
+        if n and r:
+            raise ValueError("Only one of 'r' and 'n' may be provided.")
+        elif not n and not r:
+            raise ValueError("Either 'r' or 'n' must be provided.")
+
         from scipy.spatial.distance import cdist
 
-        xyz = np.array(xyz)
+        temp_coords = self.coordinates.copy()
+
+        xyz = np.atleast_2d(xyz)
         assert xyz.shape[1] == 3 and xyz.ndim == 2
-        distances = cdist(xyz, self.coordinates[["x", "y", "z"]].values)
-        distances = np.any(distances <= r, axis=0)
-        found_ids = list(self.coordinates.loc[distances, "id"].unique())
+        distances = cdist(xyz, temp_coords[["x", "y", "z"]].values)
+        distances = np.min(distances, axis=0)
+        temp_coords["distance"] = distances
+        min_dist_ser = temp_coords.groupby("id")["distance"].min()
+
+        if r:
+            found_ids = min_dist_ser.loc[min_dist_ser <= r].index.tolist()
+        elif n:
+            found_ids = min_dist_ser.nsmallest(n).index.tolist()
+
         return found_ids
diff --git a/nimare/tests/test_dataset.py b/nimare/tests/test_dataset.py
index c41073f93..8195916f5 100644
--- a/nimare/tests/test_dataset.py
+++ b/nimare/tests/test_dataset.py
@@ -3,6 +3,7 @@
 
 import nibabel as nib
 import numpy as np
+import pytest
 
 import nimare
 from nimare import dataset
@@ -23,7 +24,15 @@ def test_dataset_smoke():
     assert isinstance(dset.get_images(imtype="beta"), list)
     assert isinstance(dset.get_metadata(field="sample_sizes"), list)
     assert isinstance(dset.get_studies_by_label("cogat_cognitive_control"), list)
-    assert isinstance(dset.get_studies_by_coordinate(np.array([[20, 20, 20]])), list)
+    assert isinstance(dset.get_studies_by_coordinate([20, 20, 20], r=20), list)
+    assert len(dset.get_studies_by_coordinate([20, 20, 20], n=2)) == 2
+    assert len(dset.get_studies_by_coordinate([20, 20, 20], n=3)) == 3
+    with pytest.raises(ValueError):
+        dset.get_studies_by_coordinate([20, 20, 20])
+
+    with pytest.raises(ValueError):
+        dset.get_studies_by_coordinate([20, 20, 20], r=20, n=4)
+
     mask_data = np.zeros(dset.masker.mask_img.shape, int)
     mask_data[40, 40, 40] = 1
     mask_img = nib.Nifti1Image(mask_data, dset.masker.mask_img.affine)

From 88d5c05f4f9436f2fe8f4b6e079084adfc05af7b Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Wed, 30 Jun 2021 12:44:48 -0400
Subject: [PATCH 02/21] Mock up maybe passable CoordCBP class.

---
 nimare/parcellate.py | 134 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 nimare/parcellate.py

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
new file mode 100644
index 000000000..3eb553f87
--- /dev/null
+++ b/nimare/parcellate.py
@@ -0,0 +1,134 @@
+"""Parcellation tools."""
+import inspect
+import logging
+
+import numpy as np
+from sklearn.cluster import KMeans
+
+from .base import NiMAREBase
+from .meta.base import CBMAEstimator
+from .results import MetaResult
+from .utils import add_metadata_to_dataframe, check_type, listify, use_memmap, vox2mm
+
+LGR = logging.getLogger(__name__)
+
+
+class CoordCBP(NiMAREBase):
+    """Base class for parcellators in :mod:`nimare.parcellate`.
+
+    .. versionadded:: 0.0.10
+
+    """
+
+    _required_inputs = {"coordinates": ("coordinates", None)}
+
+    def __init__(
+        self,
+        target_mask,
+        meta_estimator,
+        target_image,
+        n_clusters,
+        r=None,
+        n=None,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+
+        # Get kernel transformer
+        meta_estimator = check_type(meta_estimator, CBMAEstimator)
+        self.meta_estimator = meta_estimator
+        self.target_image = target_image
+        self.n_clusters = listify(n_clusters)
+
+        if r and n:
+            raise ValueError("Only one of 'r' and 'n' may be provided.")
+        elif not r and not n:
+            raise ValueError("Either 'r' or 'n' must be provided.")
+        self.r = r
+        self.n = n
+
+    def _preprocess_input(self, dataset):
+        """Mask required input images using either the dataset's mask or the estimator's.
+
+        Also, insert required metadata into coordinates DataFrame.
+        """
+        super()._preprocess_input(dataset)
+
+        # All extra (non-ijk) parameters for a kernel should be overrideable as
+        # parameters to __init__, so we can access them with get_params()
+        kt_args = list(self.kernel_transformer.get_params().keys())
+
+        # Integrate "sample_size" from metadata into DataFrame so that
+        # kernel_transformer can access it.
+        if "sample_size" in kt_args:
+            self.inputs_["coordinates"] = add_metadata_to_dataframe(
+                dataset,
+                self.inputs_["coordinates"],
+                metadata_field="sample_sizes",
+                target_column="sample_size",
+                filter_func=np.mean,
+            )
+
+    @use_memmap(LGR, n_files=1)
+    def _fit(self, dataset):
+        """
+        Perform coordinate-based meta-analysis on dataset.
+
+        Parameters
+        ----------
+        dataset : :obj:`nimare.dataset.Dataset`
+            Dataset to analyze.
+        """
+        self.dataset = dataset
+        self.masker = self.masker or dataset.masker
+        self.null_distributions_ = {}
+
+        # Loop through voxels in target_mask, selecting studies for each and running MACMs (no MCC)
+        target_ijk = np.vstack(np.where(self.target_mask.get_fdata()))
+        target_xyz = vox2mm(target_ijk, self.masker.mask_img.affine)
+        for i_coord in target_xyz.shape[1]:
+            xyz = target_xyz[:, i_coord]
+            macm_ids = dataset.get_studies_by_coordinate(xyz, r=self.r, n=self.n)
+            coord_dset = dataset.slice(macm_ids)
+
+            # This seems like a somewhat inelegant solution
+            # Check if the meta method is a pairwise estimator
+            if "dataset2" in inspect.getfullargspec(self.meta_estimator.fit).args:
+                unselected_ids = sorted(list(set(dataset.ids) - set(macm_ids)))
+                unselected_dset = dataset.slice(unselected_ids)
+                self.meta_estimator.fit(coord_dset, unselected_dset)
+            else:
+                self.meta_estimator.fit(coord_dset)
+            macm_data = self.meta_estimator.results.get_map(self.target_image, return_type="array")
+            if i_coord == 0:
+                data = np.zeros((target_xyz.shape[1], len(macm_data)))
+            data[i_coord, :] = macm_data
+
+        # Correlate voxel-wise MACM results
+        data = np.corrcoef(data)
+
+        # Convert voxel-wise correlation matrix to distances
+        data = 1 - data
+
+        # Perform clustering
+        labels = np.zeros((len(self.n_clusters), len(macm_data)), dtype=int)
+        for i_cluster, cluster_count in enumerate(self.n_clusters):
+            kmeans = KMeans(n_clusters=cluster_count, random_state=0).fit(data)
+            labels[i_cluster, :] = kmeans.labels_
+
+        images = {"labels": labels}
+        return images
+
+    def fit(self, dataset, drop_invalid=True):
+        self._validate_input(dataset, drop_invalid=drop_invalid)
+        self._preprocess_input(dataset)
+        maps = self._fit(dataset)
+
+        if hasattr(self, "masker") and self.masker is not None:
+            masker = self.masker
+        else:
+            masker = dataset.masker
+
+        self.results = MetaResult(self, masker, maps)
+        return self.results

From 3ce90f1dfb9c7ed4a4dd5434808476390df049f8 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Wed, 30 Jun 2021 16:18:50 -0400
Subject: [PATCH 03/21] Fix docstrings.

---
 nimare/parcellate.py | 52 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 3eb553f87..88a9049db 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -7,6 +7,7 @@
 
 from .base import NiMAREBase
 from .meta.base import CBMAEstimator
+from .meta.cbma.ale import ALE
 from .results import MetaResult
 from .utils import add_metadata_to_dataframe, check_type, listify, use_memmap, vox2mm
 
@@ -14,10 +15,29 @@
 
 
 class CoordCBP(NiMAREBase):
-    """Base class for parcellators in :mod:`nimare.parcellate`.
+    """Perform coordinate-based coactivation-based parcellation.
 
     .. versionadded:: 0.0.10
 
+    Parameters
+    ----------
+    target_mask : :obj:`nibabel.Nifti1.Nifti1Image`
+        Mask of target of parcellation.
+        Currently must be in same space/resolution as Dataset mask.
+    n_clusters : :obj:`list` of :obj:`int`
+        Number of clusters to evaluate in clustering.
+    r : :obj:`float` or None, optional
+        Radius (in mm) within which to find studies. Mutually exclusive with ``n``.
+        Default is None.
+    n : :obj:`int` or None, optional
+        Number of closest studies to identify. Mutually exclusive with ``r``.
+        Default is None.
+    meta_estimator : :obj:`nimare.meta.base.CBMAEstimator`, optional
+        CBMA Estimator with which to run the MACMs.
+        Default is :obj:`nimare.meta.cbma.ale.ALE`.
+    target_image : :obj:`str`, optional
+        Name of meta-analysis results image to use for clustering.
+        Default is "ale", which is specific to the ALE estimator.
     """
 
     _required_inputs = {"coordinates": ("coordinates", None)}
@@ -25,26 +45,29 @@ class CoordCBP(NiMAREBase):
     def __init__(
         self,
         target_mask,
-        meta_estimator,
-        target_image,
         n_clusters,
         r=None,
         n=None,
+        meta_estimator=None,
+        target_image="ale",
         *args,
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
 
-        # Get kernel transformer
-        meta_estimator = check_type(meta_estimator, CBMAEstimator)
-        self.meta_estimator = meta_estimator
-        self.target_image = target_image
-        self.n_clusters = listify(n_clusters)
+        if meta_estimator is None:
+            meta_estimator = ALE()
+        else:
+            meta_estimator = check_type(meta_estimator, CBMAEstimator)
 
         if r and n:
             raise ValueError("Only one of 'r' and 'n' may be provided.")
         elif not r and not n:
             raise ValueError("Either 'r' or 'n' must be provided.")
+
+        self.meta_estimator = meta_estimator
+        self.target_image = target_image
+        self.n_clusters = listify(n_clusters)
         self.r = r
         self.n = n
 
@@ -72,8 +95,7 @@ def _preprocess_input(self, dataset):
 
     @use_memmap(LGR, n_files=1)
     def _fit(self, dataset):
-        """
-        Perform coordinate-based meta-analysis on dataset.
+        """Perform coordinate-based coactivation-based parcellation on dataset.
 
         Parameters
         ----------
@@ -121,6 +143,16 @@ def _fit(self, dataset):
         return images
 
     def fit(self, dataset, drop_invalid=True):
+        """Perform coordinate-based coactivation-based parcellation on dataset.
+
+        Parameters
+        ----------
+        dataset : :obj:`nimare.dataset.Dataset`
+            Dataset to analyze.
+        drop_invalid : :obj:`bool`, optional
+            Whether to automatically ignore any studies without the required data or not.
+            Default is True.
+        """
         self._validate_input(dataset, drop_invalid=drop_invalid)
         self._preprocess_input(dataset)
         maps = self._fit(dataset)

From 40f4585f4d0a0d6a44bbb101ab0f2022f79c5517 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 1 Jul 2021 10:58:36 -0400
Subject: [PATCH 04/21] Don't calculate distances.

---
 nimare/parcellate.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 88a9049db..02ad4291f 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -122,16 +122,12 @@ def _fit(self, dataset):
                 self.meta_estimator.fit(coord_dset, unselected_dset)
             else:
                 self.meta_estimator.fit(coord_dset)
+
             macm_data = self.meta_estimator.results.get_map(self.target_image, return_type="array")
             if i_coord == 0:
                 data = np.zeros((target_xyz.shape[1], len(macm_data)))
-            data[i_coord, :] = macm_data
 
-        # Correlate voxel-wise MACM results
-        data = np.corrcoef(data)
-
-        # Convert voxel-wise correlation matrix to distances
-        data = 1 - data
+            data[i_coord, :] = macm_data
 
         # Perform clustering
         labels = np.zeros((len(self.n_clusters), len(macm_data)), dtype=int)

From 530048f66eb09c6f6f5cbae56774324a5d8de57d Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 1 Jul 2021 10:59:55 -0400
Subject: [PATCH 05/21] Remove extra newline.

---
 nimare/dataset.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nimare/dataset.py b/nimare/dataset.py
index f9d61fb85..704cc0ec1 100755
--- a/nimare/dataset.py
+++ b/nimare/dataset.py
@@ -625,7 +625,6 @@ def get_studies_by_coordinate(self, xyz, r=None, n=None):
         found_ids : :obj:`list`
             A list of IDs from the Dataset matching the search criterion.
         """
-
         if n and r:
             raise ValueError("Only one of 'r' and 'n' may be provided.")
         elif not n and not r:

From 838270439e1b546f32d1a58f8c15a74ba41efeb7 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 1 Jul 2021 12:35:23 -0400
Subject: [PATCH 06/21] Clustering should be fairly good. Still need to
 implement metrics.

---
 nimare/parcellate.py | 113 +++++++++++++++++++++++++++++--------------
 1 file changed, 78 insertions(+), 35 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 02ad4291f..13425e4cb 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -1,11 +1,15 @@
 """Parcellation tools."""
+import datetime
 import inspect
 import logging
+import os
+from tempfile import mkstemp
 
 import numpy as np
 from sklearn.cluster import KMeans
 
 from .base import NiMAREBase
+from .extract.utils import _get_dataset_dir
 from .meta.base import CBMAEstimator
 from .meta.cbma.ale import ALE
 from .results import MetaResult
@@ -26,12 +30,18 @@ class CoordCBP(NiMAREBase):
         Currently must be in same space/resolution as Dataset mask.
     n_clusters : :obj:`list` of :obj:`int`
         Number of clusters to evaluate in clustering.
-    r : :obj:`float` or None, optional
-        Radius (in mm) within which to find studies. Mutually exclusive with ``n``.
-        Default is None.
-    n : :obj:`int` or None, optional
-        Number of closest studies to identify. Mutually exclusive with ``r``.
-        Default is None.
+        Metrics will be calculated for each cluster-count in the list, to allow users to select
+        the optimal cluster solution.
+    r : :obj:`float` or :obj:`list` of :obj:`float` or None, optional
+        Radius (in mm) within which to find studies.
+        If a list of values is provided, then MACMs and clustering will be performed across all
+        values, and a selection procedure will be performed to identify the optimal ``r``.
+        Mutually exclusive with ``n``. Default is None.
+    n : :obj:`int` or :obj:`list` of :obj:`int` or None, optional
+        Number of closest studies to identify.
+        If a list of values is provided, then MACMs and clustering will be performed across all
+        values, and a selection procedure will be performed to identify the optimal ``n``.
+        Mutually exclusive with ``r``. Default is None.
     meta_estimator : :obj:`nimare.meta.base.CBMAEstimator`, optional
         CBMA Estimator with which to run the MACMs.
         Default is :obj:`nimare.meta.cbma.ale.ALE`.
@@ -68,8 +78,10 @@ def __init__(
         self.meta_estimator = meta_estimator
         self.target_image = target_image
         self.n_clusters = listify(n_clusters)
-        self.r = r
-        self.n = n
+        self.filter_selection = isinstance(r, list) or isinstance(n, list)
+        self.filter_type = "r" if r else "n"
+        self.r = listify(r)
+        self.n = listify(n)
 
     def _preprocess_input(self, dataset):
         """Mask required input images using either the dataset's mask or the estimator's.
@@ -80,7 +92,7 @@ def _preprocess_input(self, dataset):
 
         # All extra (non-ijk) parameters for a kernel should be overrideable as
         # parameters to __init__, so we can access them with get_params()
-        kt_args = list(self.kernel_transformer.get_params().keys())
+        kt_args = list(self.meta_estimator.kernel_transformer.get_params().keys())
 
         # Integrate "sample_size" from metadata into DataFrame so that
         # kernel_transformer can access it.
@@ -104,36 +116,67 @@ def _fit(self, dataset):
         """
         self.dataset = dataset
         self.masker = self.masker or dataset.masker
-        self.null_distributions_ = {}
 
         # Loop through voxels in target_mask, selecting studies for each and running MACMs (no MCC)
         target_ijk = np.vstack(np.where(self.target_mask.get_fdata()))
         target_xyz = vox2mm(target_ijk, self.masker.mask_img.affine)
-        for i_coord in target_xyz.shape[1]:
-            xyz = target_xyz[:, i_coord]
-            macm_ids = dataset.get_studies_by_coordinate(xyz, r=self.r, n=self.n)
-            coord_dset = dataset.slice(macm_ids)
-
-            # This seems like a somewhat inelegant solution
-            # Check if the meta method is a pairwise estimator
-            if "dataset2" in inspect.getfullargspec(self.meta_estimator.fit).args:
-                unselected_ids = sorted(list(set(dataset.ids) - set(macm_ids)))
-                unselected_dset = dataset.slice(unselected_ids)
-                self.meta_estimator.fit(coord_dset, unselected_dset)
-            else:
-                self.meta_estimator.fit(coord_dset)
-
-            macm_data = self.meta_estimator.results.get_map(self.target_image, return_type="array")
-            if i_coord == 0:
-                data = np.zeros((target_xyz.shape[1], len(macm_data)))
-
-            data[i_coord, :] = macm_data
-
-        # Perform clustering
-        labels = np.zeros((len(self.n_clusters), len(macm_data)), dtype=int)
-        for i_cluster, cluster_count in enumerate(self.n_clusters):
-            kmeans = KMeans(n_clusters=cluster_count, random_state=0).fit(data)
-            labels[i_cluster, :] = kmeans.labels_
+        n_target_voxels = target_xyz.shape[1]
+        n_mask_voxels = self.masker.transform(self.masker.mask_img).shape[0]
+
+        n_filters = len(getattr(self, self.filter_type))
+        labels = np.zeros((n_filters, len(self.n_clusters), n_target_voxels), dtype=int)
+        kwargs = {"r": None, "n": None}
+
+        # Use a memmapped 2D array
+        start_time = datetime.datetime.now().strftime("%Y%m%dT%H%M%S")
+        dataset_dir = _get_dataset_dir("temporary_files", data_dir=None)
+        _, memmap_filename = mkstemp(
+            prefix=self.__class__.__name__,
+            suffix=start_time,
+            dir=dataset_dir,
+        )
+        data = np.memmap(
+            memmap_filename,
+            dtype=float,
+            mode="w+",
+            shape=(n_target_voxels, n_mask_voxels),
+        )
+
+        for i_filter in range(n_filters):
+            kwargs[self.filter_type] = getattr(self, self.filter_type)[i_filter]
+            for j_coord in n_target_voxels:
+                xyz = target_xyz[:, j_coord]
+                macm_ids = dataset.get_studies_by_coordinate(xyz, **kwargs)
+                coord_dset = dataset.slice(macm_ids)
+
+                # This seems like a somewhat inelegant solution
+                # Check if the meta method is a pairwise estimator
+                if "dataset2" in inspect.getfullargspec(self.meta_estimator.fit).args:
+                    unselected_ids = sorted(list(set(dataset.ids) - set(macm_ids)))
+                    unselected_dset = dataset.slice(unselected_ids)
+                    self.meta_estimator.fit(coord_dset, unselected_dset)
+                else:
+                    self.meta_estimator.fit(coord_dset)
+
+                data[j_coord, :] = self.meta_estimator.results.get_map(
+                    self.target_image,
+                    return_type="array",
+                )  # data is overwritten across filters
+
+            # Perform clustering
+            for j_cluster, cluster_count in enumerate(self.n_clusters):
+                kmeans = KMeans(
+                    n_clusters=cluster_count,
+                    init="k-means++",
+                    n_init=10,
+                    random_state=0,
+                    algorithm="elkan",
+                ).fit(data)
+                labels[i_filter, j_cluster, :] = kmeans.labels_
+
+        # Clean up MACM data memmap
+        LGR.info(f"Removing temporary file: {memmap_filename}")
+        os.remove(memmap_filename)
 
         images = {"labels": labels}
         return images

From c79b251e83b54a22327d7a63ab1f916a456e2f10 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 1 Jul 2021 12:54:03 -0400
Subject: [PATCH 07/21] Add empty methods for the different metrics.

---
 nimare/parcellate.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 13425e4cb..091c04c78 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -181,6 +181,24 @@ def _fit(self, dataset):
         images = {"labels": labels}
         return images
 
+    def _filter_selection(self):
+        pass
+
+    def _silhouette(self):
+        pass
+
+    def _voxel_misclassification(self):
+        pass
+
+    def _variation_of_information(self):
+        pass
+
+    def _nondominant_voxel_percentage(self):
+        pass
+
+    def _cluster_distance_ratio(self):
+        pass
+
     def fit(self, dataset, drop_invalid=True):
         """Perform coordinate-based coactivation-based parcellation on dataset.
 

From 3e2d580fac55859c3ef55ae8fef15db436a9a8cb Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 1 Jul 2021 13:41:56 -0400
Subject: [PATCH 08/21] Work on filter-selection metric. Far from done.

---
 nimare/parcellate.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 091c04c78..fe13ad9b4 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -181,8 +181,21 @@ def _fit(self, dataset):
         images = {"labels": labels}
         return images
 
-    def _filter_selection(self):
-        pass
+    def _filter_selection(self, labels):
+        from scipy import stats
+        n_filters, n_clusters, n_voxels = labels.shape
+        deviant_proportions = np.zeros((n_filters, n_clusters))
+        for i_cluster in range(n_clusters):
+            cluster_labels = labels[:, i_cluster, :]
+            cluster_labels_mode = stats.mode(cluster_labels, axis=0)[0]
+            is_deviant = cluster_labels != cluster_labels_mode
+            deviant_proportion = is_deviant.mean(axis=1)
+            assert deviant_proportion.size == n_filters
+            deviant_proportions[:, i_cluster] = deviant_proportion
+        # Z-score within each cluster solution
+        deviant_z = stats.zscore(deviant_proportions, axis=0)
+        filter_deviant_z = deviant_z.sum(axis=1)
+        min_deviants_filter = np.where(filter_deviant_z == np.min(filter_deviant_z))[0]
 
     def _silhouette(self):
         pass

From 3365d5c4a998f4a0ef85c8412a9b5e5652ba9b6c Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 1 Jul 2021 15:22:57 -0400
Subject: [PATCH 09/21] Add a couple of metrics.

---
 nimare/parcellate.py | 50 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index fe13ad9b4..41f844237 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -48,6 +48,12 @@ class CoordCBP(NiMAREBase):
     target_image : :obj:`str`, optional
         Name of meta-analysis results image to use for clustering.
         Default is "ale", which is specific to the ALE estimator.
+
+    Notes
+    -----
+    This approach deviates in a number of respects from the method described in
+    Chase et al. (2020), including the fact that the scikit-learn implementation of the K-means
+    clustering algorithm only supports Euclidean distance, so correlation distance is not used.
     """
 
     _required_inputs = {"coordinates": ("coordinates", None)}
@@ -59,7 +65,7 @@ def __init__(
         r=None,
         n=None,
         meta_estimator=None,
-        target_image="ale",
+        target_image="stat",
         *args,
         **kwargs,
     ):
@@ -125,6 +131,7 @@ def _fit(self, dataset):
 
         n_filters = len(getattr(self, self.filter_type))
         labels = np.zeros((n_filters, len(self.n_clusters), n_target_voxels), dtype=int)
+        silhouettes = np.zeros((n_filters, len(self.n_clusters)), dtype=float)
         kwargs = {"r": None, "n": None}
 
         # Use a memmapped 2D array
@@ -173,6 +180,7 @@ def _fit(self, dataset):
                     algorithm="elkan",
                 ).fit(data)
                 labels[i_filter, j_cluster, :] = kmeans.labels_
+                silhouettes[i_filter, j_cluster] = self._silhouette(data, kmeans.labels_)
 
         # Clean up MACM data memmap
         LGR.info(f"Removing temporary file: {memmap_filename}")
@@ -197,14 +205,46 @@ def _filter_selection(self, labels):
         filter_deviant_z = deviant_z.sum(axis=1)
         min_deviants_filter = np.where(filter_deviant_z == np.min(filter_deviant_z))[0]
 
-    def _silhouette(self):
-        pass
+    def _silhouette(self, data, labels):
+        """Calculate silhouette score."""
+        from sklearn.metrics import silhouette_score
+        silhouette = silhouette_score(data, labels, metric="euclidean", random_state=None)
+        return silhouette
 
     def _voxel_misclassification(self):
         pass
 
-    def _variation_of_information(self):
-        pass
+    def _variation_of_information(self, X, Y):
+        """Calculate variation of information metric.
+
+        Parameters
+        ----------
+        X, Y : :obj:`list` of :obj:`list`
+            Partitions to compare
+
+        Notes
+        -----
+        From https://gist.github.com/jwcarr/626cbc80e0006b526688
+
+        Examples
+        --------
+        >>> X = [[1, 2, 3], [4, 5, 6, 7]]
+        >>> Y = [[1, 2, 3, 4], [5, 6, 7]]
+        >>> variation_of_information(X, Y)
+        0.9271749993818661
+        """
+        from math import log
+
+        n = float(sum([len(x) for x in X]))
+        sigma = 0.0
+        for x in X:
+            p = len(x) / n
+            for y in Y:
+                q = len(y) / n
+                r = len(set(x) & set(y)) / n
+                if r > 0.0:
+                    sigma += r * (log(r / p, 2) + log(r / q, 2))
+        return abs(sigma)
 
     def _nondominant_voxel_percentage(self):
         pass

From 708160433750e9d73fc27deb95424dad65005c42 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 2 Jul 2021 11:19:54 -0400
Subject: [PATCH 10/21] Improve metric documentation.

---
 nimare/parcellate.py | 68 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 62 insertions(+), 6 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 41f844237..8b86652b6 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -190,6 +190,13 @@ def _fit(self, dataset):
         return images
 
     def _filter_selection(self, labels):
+        """Select a range of optimal filter values based on consistency of cluster assignment.
+
+        Parameters
+        ----------
+        labels : :obj:`numpy.ndarray` of shape (n_filters, nclusters, n_voxels)
+            Labeling results from a range of KMeans clustering runs.
+        """
         from scipy import stats
         n_filters, n_clusters, n_voxels = labels.shape
         deviant_proportions = np.zeros((n_filters, n_clusters))
@@ -205,13 +212,23 @@ def _filter_selection(self, labels):
         filter_deviant_z = deviant_z.sum(axis=1)
         min_deviants_filter = np.where(filter_deviant_z == np.min(filter_deviant_z))[0]
 
-    def _silhouette(self, data, labels):
-        """Calculate silhouette score."""
-        from sklearn.metrics import silhouette_score
-        silhouette = silhouette_score(data, labels, metric="euclidean", random_state=None)
-        return silhouette
+        # This is not the end
+        return min_deviants_filter
 
     def _voxel_misclassification(self):
+        """Calculate voxel misclassification metric.
+
+        Notes
+        -----
+        From Chase et al. (2020):
+        First, misclassified voxels (deviants) were examined as a topological criterion,
+        with optimal K parcellations being those in which the percentage of deviants was not
+        significantly increased compared to the K − 1 solution and but where the K + 1 was
+        associated with significantly increased deviants.
+
+        TS: Deviants are presumably calculated only from the range of filters selected in the
+        filter selection step.
+        """
         pass
 
     def _variation_of_information(self, X, Y):
@@ -224,7 +241,12 @@ def _variation_of_information(self, X, Y):
 
         Notes
         -----
-        From https://gist.github.com/jwcarr/626cbc80e0006b526688
+        Implementation adapted from https://gist.github.com/jwcarr/626cbc80e0006b526688.
+
+        From Chase et al. (2020):
+        Second, the variation of information (VI) metric was employed as an information-theoretic
+        criterion to assess the similarity of cluster assignments for each filter size between
+        the current solution and the neighboring (K − 1 and K + 1) solutions.
 
         Examples
         --------
@@ -246,10 +268,44 @@ def _variation_of_information(self, X, Y):
                     sigma += r * (log(r / p, 2) + log(r / q, 2))
         return abs(sigma)
 
+    def _average_silhouette(self, data, labels):
+        """Calculate average silhouette score.
+
+        Notes
+        -----
+        From Chase et al. (2020):
+        Third, the silhouette value averaged across voxels for each filter size was considered a
+        cluster separation criterion.
+        """
+        from sklearn.metrics import silhouette_score
+        silhouette = silhouette_score(data, labels, metric="euclidean", random_state=None)
+        # Average across voxels
+        return silhouette
+
     def _nondominant_voxel_percentage(self):
+        """Calculate percentage-of-voxels-not-with-parent metric.
+
+        Notes
+        -----
+        From Chase et al. (2020):
+        Fourth, we assessed the percentage of voxels not related to the dominant parent cluster
+        compared with the K − 1 solution as a second topological criterion.
+        This measure corresponds to the percentage voxels that are not present in hierarchy, K,
+        compared with the previous K − 1 solution, and is related to the hierarchy index.
+        """
         pass
 
     def _cluster_distance_ratio(self):
+        """Calculate change-in-inter/intra-cluster-distance metric.
+
+        Notes
+        -----
+        From Chase et al. (2020):
+        Finally, the change in inter- versus intra-cluster distance ratio was computed as a second
+        cluster separation criterion.
+        This ratio is the first derivative of the ratio between the average distance of a given
+        voxel to its own cluster center and the average distance between the cluster centers.
+        """
         pass
 
     def fit(self, dataset, drop_invalid=True):

From 12cddec571d62d97baaf54cc2ba82e66ede70223 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 2 Jul 2021 11:54:56 -0400
Subject: [PATCH 11/21] VI should be fairly good now.

---
 nimare/parcellate.py | 77 ++++++++++++++++++++++++++++++--------------
 nimare/utils.py      | 33 +++++++++++++++++++
 2 files changed, 86 insertions(+), 24 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 8b86652b6..39ddfcff8 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -198,6 +198,7 @@ def _filter_selection(self, labels):
             Labeling results from a range of KMeans clustering runs.
         """
         from scipy import stats
+
         n_filters, n_clusters, n_voxels = labels.shape
         deviant_proportions = np.zeros((n_filters, n_clusters))
         for i_cluster in range(n_clusters):
@@ -231,42 +232,69 @@ def _voxel_misclassification(self):
         """
         pass
 
-    def _variation_of_information(self, X, Y):
+    def _variation_of_information(self, labels, cluster_counts):
         """Calculate variation of information metric.
 
         Parameters
         ----------
-        X, Y : :obj:`list` of :obj:`list`
-            Partitions to compare
+        labels : :obj:`numpy.ndarray` of shape (n_filters, n_clusters, n_voxels)
+        cluster_counts : :obj:`list` of :obj:`int`
+            The set of K values tested. Must be n_clusters items long.
+
+        Returns
+        -------
+        vi_values : :obj:`numpy.ndarray` of shape (n_filters, n_clusters, 2)
+            Variation of information values for each filter and each cluster, where the last
+            dimension has two values: the VI value for K vs. K - 1 and the value for K vs. K + 1.
+            K values with no K - 1 or K + 1 have a NaN in the associated cell.
 
         Notes
         -----
-        Implementation adapted from https://gist.github.com/jwcarr/626cbc80e0006b526688.
-
         From Chase et al. (2020):
         Second, the variation of information (VI) metric was employed as an information-theoretic
         criterion to assess the similarity of cluster assignments for each filter size between
         the current solution and the neighboring (K − 1 and K + 1) solutions.
-
-        Examples
-        --------
-        >>> X = [[1, 2, 3], [4, 5, 6, 7]]
-        >>> Y = [[1, 2, 3, 4], [5, 6, 7]]
-        >>> variation_of_information(X, Y)
-        0.9271749993818661
         """
-        from math import log
-
-        n = float(sum([len(x) for x in X]))
-        sigma = 0.0
-        for x in X:
-            p = len(x) / n
-            for y in Y:
-                q = len(y) / n
-                r = len(set(x) & set(y)) / n
-                if r > 0.0:
-                    sigma += r * (log(r / p, 2) + log(r / q, 2))
-        return abs(sigma)
+        from .utils import variation_of_information
+
+        n_filters, n_clusters, _ = labels.shape
+        assert len(cluster_counts) == n_clusters
+
+        vi_values = np.empty((n_filters, n_clusters, 2))
+        for i_filter in range(n_filters):
+            filter_labels = labels[i_filter, :, :]
+            for j_cluster, cluster_count in enumerate(cluster_counts):
+                cluster_partition = [
+                    np.where(filter_labels[j_cluster, :] == k_cluster_num)[0]
+                    for k_cluster_num in range(cluster_count)
+                ]
+                if j_cluster > 0:
+                    cluster_m1_partition = [
+                        np.where(filter_labels[j_cluster - 1, :] == k_cluster_num)[0]
+                        for k_cluster_num in range(cluster_counts[j_cluster - 1])
+                    ]
+                    # Calculate VI between K and K - 1
+                    vi_values[i_filter, j_cluster, 0] = variation_of_information(
+                        cluster_partition,
+                        cluster_m1_partition,
+                    )
+                else:
+                    vi_values[i_filter, j_cluster, 0] = np.nan
+
+                if j_cluster < (labels.shape[1] - 1):
+                    cluster_p1_partition = [
+                        np.where(filter_labels[j_cluster + 1, :] == k_cluster_num)[0]
+                        for k_cluster_num in range(cluster_counts[j_cluster + 1])
+                    ]
+                    # Calculate VI between K and K + 1
+                    vi_values[i_filter, j_cluster, 1] = variation_of_information(
+                        cluster_partition,
+                        cluster_p1_partition,
+                    )
+                else:
+                    vi_values[i_filter, j_cluster, 1] = np.nan
+
+        return vi_values
 
     def _average_silhouette(self, data, labels):
         """Calculate average silhouette score.
@@ -278,6 +306,7 @@ def _average_silhouette(self, data, labels):
         cluster separation criterion.
         """
         from sklearn.metrics import silhouette_score
+
         silhouette = silhouette_score(data, labels, metric="euclidean", random_state=None)
         # Average across voxels
         return silhouette
diff --git a/nimare/utils.py b/nimare/utils.py
index 6d4f4a6f1..7592f9e63 100755
--- a/nimare/utils.py
+++ b/nimare/utils.py
@@ -789,3 +789,36 @@ def mni2tal(coords):
     if use_dim == 1:
         out_coords = out_coords.transpose()
     return out_coords
+
+
+def variation_of_information(X, Y):
+    """Calculate variation of information metric.
+
+    Parameters
+    ----------
+    X, Y : :obj:`list` of :obj:`list`
+        Partitions to compare
+
+    Notes
+    -----
+    Implementation adapted from https://gist.github.com/jwcarr/626cbc80e0006b526688.
+
+    Examples
+    --------
+    >>> X = [[1, 2, 3], [4, 5, 6, 7]]
+    >>> Y = [[1, 2, 3, 4], [5, 6, 7]]
+    >>> variation_of_information(X, Y)
+    0.9271749993818661
+    """
+    from math import log
+
+    n = float(sum([len(x) for x in X]))
+    sigma = 0.0
+    for x in X:
+        p = len(x) / n
+        for y in Y:
+            q = len(y) / n
+            r = len(set(x) & set(y)) / n
+            if r > 0.0:
+                sigma += r * (log(r / p, 2) + log(r / q, 2))
+    return abs(sigma)

From f37751aa18d6ed5b90509806a2cf1bfd54802afd Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 9 Jul 2021 17:16:05 -0400
Subject: [PATCH 12/21] Draft voxel misclassification metric.

---
 nimare/parcellate.py | 90 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 85 insertions(+), 5 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 39ddfcff8..7342fb923 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -196,6 +196,18 @@ def _filter_selection(self, labels):
         ----------
         labels : :obj:`numpy.ndarray` of shape (n_filters, nclusters, n_voxels)
             Labeling results from a range of KMeans clustering runs.
+
+        Notes
+        -----
+        From Chase et al. (2020):
+        We implemented a two-step procedure that involved a decision on those filter sizes to be
+        included in the final analysis and subsequently a decision on the optimal cluster solution.
+        In the first step, we examined the consistency of the cluster assignment for the individual
+        voxels across the cluster solutions of the co-occurrence maps performed at different filter
+        sizes. We selected a filter range with the lowest number of deviants, that is, number of
+        voxels that were assigned differently compared with the solution from the majority of
+        filters. In other words, we identified those filter sizes which produced solutions most
+        similar to the consensus-solution across all filter sizes
         """
         from scipy import stats
 
@@ -213,12 +225,18 @@ def _filter_selection(self, labels):
         filter_deviant_z = deviant_z.sum(axis=1)
         min_deviants_filter = np.where(filter_deviant_z == np.min(filter_deviant_z))[0]
 
-        # This is not the end
+        # NOTE: This is not the end, but I don't know how filters were selected based on this array
         return min_deviants_filter
 
-    def _voxel_misclassification(self):
+    def _voxel_misclassification(self, selected_filter_labels, alpha=0.05):
         """Calculate voxel misclassification metric.
 
+        Parameters
+        ----------
+        labels : :obj:`numpy.ndarray` of shape (n_selected_filters, nclusters, n_voxels)
+            Labeling results from a range of KMeans clustering runs, limited to those filters
+            that survived the filter selection procedure.
+
         Notes
         -----
         From Chase et al. (2020):
@@ -229,8 +247,67 @@ def _voxel_misclassification(self):
 
         TS: Deviants are presumably calculated only from the range of filters selected in the
         filter selection step.
+
+        TS: To measure significance, we could do a series of pair-wise proportion z-tests
+        Left side null hypothesis: proportion from K is <= K - 1
+        Left side alternative: K > K -1
+        Accept on null
+        z, p = proportions_ztest([k, k-1], [n, n], alternative="larger")
+        p >= alpha --> YAY
+        Right side null hypothesis: proportion from K is >= K + 1
+        Right side alternative: K < K + 1
+        Accept on rejected null
+        z, p = proportions_ztest([k, k+1], [n, n], alternative="smaller")
+        p < alpha --> YAY
         """
-        pass
+        from scipy import stats
+        from statsmodels.stats.proportion import proportions_ztest
+
+        n_filters, n_clusters, n_voxels = selected_filter_labels.shape
+        count = n_voxels * n_filters
+        deviant_counts = np.zeros((n_filters, n_clusters))
+        for i_cluster in range(n_clusters):
+            cluster_labels = selected_filter_labels[:, i_cluster, :]
+            cluster_labels_mode = stats.mode(cluster_labels, axis=0)[0]
+            is_deviant = cluster_labels != cluster_labels_mode
+            deviant_count = is_deviant.sum(axis=1)
+            assert deviant_count.size == n_filters
+            deviant_counts[:, i_cluster] = deviant_count
+
+        decision_criteria = np.empty((n_clusters, 2), dtype=bool)
+
+        for i_cluster in range(n_clusters):
+            cluster_deviant_count = np.sum(deviant_counts[:, i_cluster])
+
+            # Compare to k - 1
+            if i_cluster > 0:
+                m1_cluster_deviant_count = np.sum(deviant_counts[:, i_cluster - 1])
+                _, p = proportions_ztest(
+                    count=[cluster_deviant_count, m1_cluster_deviant_count],
+                    nobs=[count, count],
+                    alternative="larger",
+                )
+                decision_criteria[i_cluster, 0] = p >= alpha
+            else:
+                decision_criteria[i_cluster, 0] = np.nan
+
+            if i_cluster < (n_clusters - 1):
+                p1_cluster_deviant_count = np.sum(deviant_counts[:, i_cluster + 1])
+                _, p = proportions_ztest(
+                    count=[cluster_deviant_count, p1_cluster_deviant_count],
+                    nobs=[count, count],
+                    alternative="smaller",
+                )
+                decision_criteria[i_cluster, 1] = p < alpha
+            else:
+                decision_criteria[i_cluster, 1] = np.nan
+
+        decision_results = np.all(decision_criteria, axis=1)
+        good_cluster_idx = np.where(decision_results)[0]
+        if not good_cluster_idx:
+            LGR.warning("No cluster solution found according to VM metric.")
+
+        return good_cluster_idx
 
     def _variation_of_information(self, labels, cluster_counts):
         """Calculate variation of information metric.
@@ -268,12 +345,14 @@ def _variation_of_information(self, labels, cluster_counts):
                     np.where(filter_labels[j_cluster, :] == k_cluster_num)[0]
                     for k_cluster_num in range(cluster_count)
                 ]
+
+                # Calculate VI between K and K - 1
                 if j_cluster > 0:
                     cluster_m1_partition = [
                         np.where(filter_labels[j_cluster - 1, :] == k_cluster_num)[0]
                         for k_cluster_num in range(cluster_counts[j_cluster - 1])
                     ]
-                    # Calculate VI between K and K - 1
+
                     vi_values[i_filter, j_cluster, 0] = variation_of_information(
                         cluster_partition,
                         cluster_m1_partition,
@@ -281,12 +360,13 @@ def _variation_of_information(self, labels, cluster_counts):
                 else:
                     vi_values[i_filter, j_cluster, 0] = np.nan
 
+                # Calculate VI between K and K + 1
                 if j_cluster < (labels.shape[1] - 1):
                     cluster_p1_partition = [
                         np.where(filter_labels[j_cluster + 1, :] == k_cluster_num)[0]
                         for k_cluster_num in range(cluster_counts[j_cluster + 1])
                     ]
-                    # Calculate VI between K and K + 1
+
                     vi_values[i_filter, j_cluster, 1] = variation_of_information(
                         cluster_partition,
                         cluster_p1_partition,

From 0896745f258dbc38a8143c95792d3a3cd537ee29 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Wed, 14 Jul 2021 12:52:03 -0400
Subject: [PATCH 13/21] Draft another metric.

Will move most of the work to the main fit loop, because this metric requires too much information.
---
 nimare/parcellate.py | 47 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 7342fb923..89299851b 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -131,6 +131,10 @@ def _fit(self, dataset):
 
         n_filters = len(getattr(self, self.filter_type))
         labels = np.zeros((n_filters, len(self.n_clusters), n_target_voxels), dtype=int)
+        cluster_centers = np.zeros(
+            (n_filters, len(self.n_clusters), np.max(self.n_clusters), n_mask_voxels),
+            dtype=float,
+        )
         silhouettes = np.zeros((n_filters, len(self.n_clusters)), dtype=float)
         kwargs = {"r": None, "n": None}
 
@@ -180,6 +184,7 @@ def _fit(self, dataset):
                     algorithm="elkan",
                 ).fit(data)
                 labels[i_filter, j_cluster, :] = kmeans.labels_
+                cluster_centers[i_filter, j_cluster, :cluster_count, :] = kmeans.cluster_centers_
                 silhouettes[i_filter, j_cluster] = self._silhouette(data, kmeans.labels_)
 
         # Clean up MACM data memmap
@@ -404,9 +409,18 @@ def _nondominant_voxel_percentage(self):
         """
         pass
 
-    def _cluster_distance_ratio(self):
+    def _cluster_distance_ratio(self, labels, cluster_centers, cluster_counts, data):
         """Calculate change-in-inter/intra-cluster-distance metric.
 
+        Parameters
+        ----------
+        labels : :obj:`numpy.ndarray` of shape (n_filters, n_clusters, n_target_voxels)
+        cluster_centers : :obj:`numpy.ndarray` of shape
+                          (n_filters, n_clusters, max_cluster_size, n_feature_voxels)
+            Cluster centers.
+        cluster_counts : :obj:`list` of :obj:`int`
+            The set of K values tested. Must be n_clusters items long.
+
         Notes
         -----
         From Chase et al. (2020):
@@ -415,7 +429,36 @@ def _cluster_distance_ratio(self):
         This ratio is the first derivative of the ratio between the average distance of a given
         voxel to its own cluster center and the average distance between the cluster centers.
         """
-        pass
+        from scipy.spatial import distance
+
+        n_filters, n_clusters, n_target_voxels = labels.shape
+        _, _, max_cluster_size, n_feature_voxels = cluster_centers.shape
+        assert n_filters == cluster_centers.shape[0]
+        assert n_clusters == cluster_centers.shape[1]
+        ratios = np.zeros((n_filters, n_clusters), dtype=float)
+        for i_filter in range(n_filters):
+            for j_cluster in range(n_clusters):
+                cluster_count = cluster_counts[j_cluster]
+                solution_labels = labels[i_filter, j_cluster, :]
+                solution_centers = cluster_centers[i_filter, j_cluster, :cluster_count, :]
+                avg_center_distance = np.mean(distance.pdist(solution_centers))
+                total_voxel_center_distance = 0
+                for k_cluster_val in range(cluster_count):
+                    cluster_val_voxels = np.where(solution_labels == k_cluster_val)[0]
+                    cluster_val_center = solution_centers[k_cluster_val, :]
+                    cluster_val_features = data[cluster_val_voxels, :]
+                    cluster_val_distances = distance.cdist(
+                        cluster_val_center,
+                        cluster_val_features,
+                    )
+                    total_voxel_center_distance += np.sum(cluster_val_distances)
+                avg_voxel_center_distance = total_voxel_center_distance / n_target_voxels
+                ratio = avg_voxel_center_distance / avg_center_distance
+                ratios[i_filter, j_cluster] = ratio
+
+        # TODO: Calculate first derivative?
+
+        return ratios
 
     def fit(self, dataset, drop_invalid=True):
         """Perform coordinate-based coactivation-based parcellation on dataset.

From f9611dbedbfcd5861f89cc59c7c7420beef3f57d Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Wed, 14 Jul 2021 12:53:44 -0400
Subject: [PATCH 14/21] Little cleanup.

---
 nimare/parcellate.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 89299851b..97ccafa8d 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -441,7 +441,12 @@ def _cluster_distance_ratio(self, labels, cluster_centers, cluster_counts, data)
                 cluster_count = cluster_counts[j_cluster]
                 solution_labels = labels[i_filter, j_cluster, :]
                 solution_centers = cluster_centers[i_filter, j_cluster, :cluster_count, :]
-                avg_center_distance = np.mean(distance.pdist(solution_centers))
+                avg_intracenter_distance = np.mean(
+                    distance.pdist(
+                        solution_centers,
+                        metric="euclidean",
+                    )
+                )
                 total_voxel_center_distance = 0
                 for k_cluster_val in range(cluster_count):
                     cluster_val_voxels = np.where(solution_labels == k_cluster_val)[0]
@@ -450,10 +455,11 @@ def _cluster_distance_ratio(self, labels, cluster_centers, cluster_counts, data)
                     cluster_val_distances = distance.cdist(
                         cluster_val_center,
                         cluster_val_features,
+                        metric="euclidean",
                     )
                     total_voxel_center_distance += np.sum(cluster_val_distances)
                 avg_voxel_center_distance = total_voxel_center_distance / n_target_voxels
-                ratio = avg_voxel_center_distance / avg_center_distance
+                ratio = avg_voxel_center_distance / avg_intracenter_distance
                 ratios[i_filter, j_cluster] = ratio
 
         # TODO: Calculate first derivative?

From 909c82243f04f7bd59eff405f92dbd8e97f91784 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Wed, 14 Jul 2021 13:26:59 -0400
Subject: [PATCH 15/21] Move ratio calculations to main fit method.

---
 nimare/parcellate.py | 65 ++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 39 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 97ccafa8d..00a0fd194 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -6,6 +6,7 @@
 from tempfile import mkstemp
 
 import numpy as np
+from scipy.spatial import distance
 from sklearn.cluster import KMeans
 
 from .base import NiMAREBase
@@ -131,10 +132,6 @@ def _fit(self, dataset):
 
         n_filters = len(getattr(self, self.filter_type))
         labels = np.zeros((n_filters, len(self.n_clusters), n_target_voxels), dtype=int)
-        cluster_centers = np.zeros(
-            (n_filters, len(self.n_clusters), np.max(self.n_clusters), n_mask_voxels),
-            dtype=float,
-        )
         silhouettes = np.zeros((n_filters, len(self.n_clusters)), dtype=float)
         kwargs = {"r": None, "n": None}
 
@@ -152,6 +149,7 @@ def _fit(self, dataset):
             mode="w+",
             shape=(n_target_voxels, n_mask_voxels),
         )
+        ratios = np.zeros((n_filters, len(self.n_clusters)), dtype=float)
 
         for i_filter in range(n_filters):
             kwargs[self.filter_type] = getattr(self, self.filter_type)[i_filter]
@@ -184,9 +182,31 @@ def _fit(self, dataset):
                     algorithm="elkan",
                 ).fit(data)
                 labels[i_filter, j_cluster, :] = kmeans.labels_
-                cluster_centers[i_filter, j_cluster, :cluster_count, :] = kmeans.cluster_centers_
+
                 silhouettes[i_filter, j_cluster] = self._silhouette(data, kmeans.labels_)
 
+                # Necessary calculations for ratios metric
+                avg_intracenter_distance = np.mean(
+                    distance.pdist(
+                        kmeans.cluster_centers_,
+                        metric="euclidean",
+                    )
+                )
+                total_voxel_center_distance = 0
+                for k_cluster_val in range(cluster_count):
+                    cluster_val_voxels = np.where(kmeans.labels_ == k_cluster_val)[0]
+                    cluster_val_center = kmeans.cluster_centers_[k_cluster_val, :]
+                    cluster_val_features = data[cluster_val_voxels, :]
+                    cluster_val_distances = distance.cdist(
+                        cluster_val_center,
+                        cluster_val_features,
+                        metric="euclidean",
+                    )
+                    total_voxel_center_distance += np.sum(cluster_val_distances)
+                avg_voxel_center_distance = total_voxel_center_distance / n_target_voxels
+                ratio = avg_voxel_center_distance / avg_intracenter_distance
+                ratios[i_filter, j_cluster] = ratio
+
         # Clean up MACM data memmap
         LGR.info(f"Removing temporary file: {memmap_filename}")
         os.remove(memmap_filename)
@@ -409,7 +429,7 @@ def _nondominant_voxel_percentage(self):
         """
         pass
 
-    def _cluster_distance_ratio(self, labels, cluster_centers, cluster_counts, data):
+    def _cluster_distance_ratio(self, ratios):
         """Calculate change-in-inter/intra-cluster-distance metric.
 
         Parameters
@@ -429,39 +449,6 @@ def _cluster_distance_ratio(self, labels, cluster_centers, cluster_counts, data)
         This ratio is the first derivative of the ratio between the average distance of a given
         voxel to its own cluster center and the average distance between the cluster centers.
         """
-        from scipy.spatial import distance
-
-        n_filters, n_clusters, n_target_voxels = labels.shape
-        _, _, max_cluster_size, n_feature_voxels = cluster_centers.shape
-        assert n_filters == cluster_centers.shape[0]
-        assert n_clusters == cluster_centers.shape[1]
-        ratios = np.zeros((n_filters, n_clusters), dtype=float)
-        for i_filter in range(n_filters):
-            for j_cluster in range(n_clusters):
-                cluster_count = cluster_counts[j_cluster]
-                solution_labels = labels[i_filter, j_cluster, :]
-                solution_centers = cluster_centers[i_filter, j_cluster, :cluster_count, :]
-                avg_intracenter_distance = np.mean(
-                    distance.pdist(
-                        solution_centers,
-                        metric="euclidean",
-                    )
-                )
-                total_voxel_center_distance = 0
-                for k_cluster_val in range(cluster_count):
-                    cluster_val_voxels = np.where(solution_labels == k_cluster_val)[0]
-                    cluster_val_center = solution_centers[k_cluster_val, :]
-                    cluster_val_features = data[cluster_val_voxels, :]
-                    cluster_val_distances = distance.cdist(
-                        cluster_val_center,
-                        cluster_val_features,
-                        metric="euclidean",
-                    )
-                    total_voxel_center_distance += np.sum(cluster_val_distances)
-                avg_voxel_center_distance = total_voxel_center_distance / n_target_voxels
-                ratio = avg_voxel_center_distance / avg_intracenter_distance
-                ratios[i_filter, j_cluster] = ratio
-
         # TODO: Calculate first derivative?
 
         return ratios

From 65857d1d718d9d2704ea90fffb07c0f4eb2cf022 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Wed, 14 Jul 2021 13:47:28 -0400
Subject: [PATCH 16/21] More cleanup.

---
 nimare/parcellate.py | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 00a0fd194..8d12e17be 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -87,6 +87,7 @@ def __init__(
         self.n_clusters = listify(n_clusters)
         self.filter_selection = isinstance(r, list) or isinstance(n, list)
         self.filter_type = "r" if r else "n"
+        self.alpha = 0.05
         self.r = listify(r)
         self.n = listify(n)
 
@@ -207,6 +208,24 @@ def _fit(self, dataset):
                 ratio = avg_voxel_center_distance / avg_intracenter_distance
                 ratios[i_filter, j_cluster] = ratio
 
+        # Identify range of optimal filters
+        retained_filter_idx = self._filter_selection(labels)
+        labels = labels[retained_filter_idx, ...]
+        silhouettes = silhouettes[retained_filter_idx, ...]
+        ratios = ratios[retained_filter_idx, ...]
+
+        # Calculate metrics
+        vm_results = self._voxel_misclassification(labels, alpha=self.alpha)  # boolx2
+        vi_results = self._variation_of_information(labels, self.n_clusters)  # floatx2
+        s_results = silhouettes  # float
+        nvp_results = self._nondominant_voxel_percentage()  # ?
+        cdr_results = self._cluster_distance_ratio(ratios)  # float
+        vote = np.nansum(
+            np.dstack((vm_results, vi_results, s_results, nvp_results, cdr_results)),
+            axis=2,
+        )
+        assert vote.shape == (n_filters, len(self.n_clusters))
+
         # Clean up MACM data memmap
         LGR.info(f"Removing temporary file: {memmap_filename}")
         os.remove(memmap_filename)
@@ -434,12 +453,7 @@ def _cluster_distance_ratio(self, ratios):
 
         Parameters
         ----------
-        labels : :obj:`numpy.ndarray` of shape (n_filters, n_clusters, n_target_voxels)
-        cluster_centers : :obj:`numpy.ndarray` of shape
-                          (n_filters, n_clusters, max_cluster_size, n_feature_voxels)
-            Cluster centers.
-        cluster_counts : :obj:`list` of :obj:`int`
-            The set of K values tested. Must be n_clusters items long.
+        ratios
 
         Notes
         -----

From 65b967513fbd89466475590418ba101c48e5e8df Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Wed, 14 Jul 2021 15:58:39 -0400
Subject: [PATCH 17/21] More work and debugging.

---
 nimare/parcellate.py | 60 ++++++++++++++++++++++++++------------------
 nimare/utils.py      |  2 +-
 2 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index 8d12e17be..c62c8b1a9 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -6,13 +6,14 @@
 from tempfile import mkstemp
 
 import numpy as np
+from nilearn._utils import load_niimg
 from scipy.spatial import distance
 from sklearn.cluster import KMeans
 
 from .base import NiMAREBase
 from .extract.utils import _get_dataset_dir
-from .meta.base import CBMAEstimator
 from .meta.cbma.ale import ALE
+from .meta.cbma.base import CBMAEstimator
 from .results import MetaResult
 from .utils import add_metadata_to_dataframe, check_type, listify, use_memmap, vox2mm
 
@@ -48,7 +49,7 @@ class CoordCBP(NiMAREBase):
         Default is :obj:`nimare.meta.cbma.ale.ALE`.
     target_image : :obj:`str`, optional
         Name of meta-analysis results image to use for clustering.
-        Default is "ale", which is specific to the ALE estimator.
+        Default is "stat".
 
     Notes
     -----
@@ -82,6 +83,7 @@ def __init__(
         elif not r and not n:
             raise ValueError("Either 'r' or 'n' must be provided.")
 
+        self.target_mask = load_niimg(target_mask)
         self.meta_estimator = meta_estimator
         self.target_image = target_image
         self.n_clusters = listify(n_clusters)
@@ -90,6 +92,7 @@ def __init__(
         self.alpha = 0.05
         self.r = listify(r)
         self.n = listify(n)
+        self.masker = None
 
     def _preprocess_input(self, dataset):
         """Mask required input images using either the dataset's mask or the estimator's.
@@ -126,10 +129,11 @@ def _fit(self, dataset):
         self.masker = self.masker or dataset.masker
 
         # Loop through voxels in target_mask, selecting studies for each and running MACMs (no MCC)
-        target_ijk = np.vstack(np.where(self.target_mask.get_fdata()))
+        target_ijk = np.vstack(np.where(self.target_mask.get_fdata())).T
+        target_ijk = target_ijk[:20, :]  # limit to first 20 for testing
         target_xyz = vox2mm(target_ijk, self.masker.mask_img.affine)
-        n_target_voxels = target_xyz.shape[1]
-        n_mask_voxels = self.masker.transform(self.masker.mask_img).shape[0]
+        n_target_voxels = target_xyz.shape[0]
+        n_mask_voxels = self.masker.transform(self.masker.mask_img).size
 
         n_filters = len(getattr(self, self.filter_type))
         labels = np.zeros((n_filters, len(self.n_clusters), n_target_voxels), dtype=int)
@@ -154,8 +158,8 @@ def _fit(self, dataset):
 
         for i_filter in range(n_filters):
             kwargs[self.filter_type] = getattr(self, self.filter_type)[i_filter]
-            for j_coord in n_target_voxels:
-                xyz = target_xyz[:, j_coord]
+            for j_coord in range(n_target_voxels):
+                xyz = target_xyz[j_coord, :]
                 macm_ids = dataset.get_studies_by_coordinate(xyz, **kwargs)
                 coord_dset = dataset.slice(macm_ids)
 
@@ -184,10 +188,10 @@ def _fit(self, dataset):
                 ).fit(data)
                 labels[i_filter, j_cluster, :] = kmeans.labels_
 
-                silhouettes[i_filter, j_cluster] = self._silhouette(data, kmeans.labels_)
+                silhouettes[i_filter, j_cluster] = self._average_silhouette(data, kmeans.labels_)
 
                 # Necessary calculations for ratios metric
-                avg_intracenter_distance = np.mean(
+                avg_intercenter_distance = np.mean(
                     distance.pdist(
                         kmeans.cluster_centers_,
                         metric="euclidean",
@@ -199,24 +203,28 @@ def _fit(self, dataset):
                     cluster_val_center = kmeans.cluster_centers_[k_cluster_val, :]
                     cluster_val_features = data[cluster_val_voxels, :]
                     cluster_val_distances = distance.cdist(
-                        cluster_val_center,
+                        cluster_val_center[None, :],
                         cluster_val_features,
                         metric="euclidean",
                     )
                     total_voxel_center_distance += np.sum(cluster_val_distances)
-                avg_voxel_center_distance = total_voxel_center_distance / n_target_voxels
-                ratio = avg_voxel_center_distance / avg_intracenter_distance
+                avg_intracluster_distance = total_voxel_center_distance / n_target_voxels
+                ratio = avg_intercenter_distance / avg_intracluster_distance
                 ratios[i_filter, j_cluster] = ratio
 
+        # return labels, silhouettes, ratios
         # Identify range of optimal filters
-        retained_filter_idx = self._filter_selection(labels)
-        labels = labels[retained_filter_idx, ...]
-        silhouettes = silhouettes[retained_filter_idx, ...]
-        ratios = ratios[retained_filter_idx, ...]
+        # retained_filter_idx = self._filter_selection(labels)
+        # labels = labels[retained_filter_idx, ...]
+        # silhouettes = silhouettes[retained_filter_idx, ...]
+        # ratios = ratios[retained_filter_idx, ...]
 
         # Calculate metrics
-        vm_results = self._voxel_misclassification(labels, alpha=self.alpha)  # boolx2
+        vm_decision_criteria, deviant_props = self._voxel_misclassification(
+            labels, alpha=self.alpha)  # boolx2
         vi_results = self._variation_of_information(labels, self.n_clusters)  # floatx2
+
+        return labels, silhouettes, ratios, deviant_props, vi_results
         s_results = silhouettes  # float
         nvp_results = self._nondominant_voxel_percentage()  # ?
         cdr_results = self._cluster_distance_ratio(ratios)  # float
@@ -348,10 +356,12 @@ def _voxel_misclassification(self, selected_filter_labels, alpha=0.05):
 
         decision_results = np.all(decision_criteria, axis=1)
         good_cluster_idx = np.where(decision_results)[0]
-        if not good_cluster_idx:
+        if not good_cluster_idx.size:
             LGR.warning("No cluster solution found according to VM metric.")
 
-        return good_cluster_idx
+        deviant_props = deviant_counts / n_voxels
+
+        return decision_criteria, deviant_props
 
     def _variation_of_information(self, labels, cluster_counts):
         """Calculate variation of information metric.
@@ -453,7 +463,7 @@ def _cluster_distance_ratio(self, ratios):
 
         Parameters
         ----------
-        ratios
+        ratios : (n_filters, n_cluster_solutions)
 
         Notes
         -----
@@ -464,8 +474,8 @@ def _cluster_distance_ratio(self, ratios):
         voxel to its own cluster center and the average distance between the cluster centers.
         """
         # TODO: Calculate first derivative?
-
-        return ratios
+        ratio_derivs = np.diff(ratios, n=1, axis=1)
+        return ratio_derivs
 
     def fit(self, dataset, drop_invalid=True):
         """Perform coordinate-based coactivation-based parcellation on dataset.
@@ -478,14 +488,14 @@ def fit(self, dataset, drop_invalid=True):
             Whether to automatically ignore any studies without the required data or not.
             Default is True.
         """
-        self._validate_input(dataset, drop_invalid=drop_invalid)
-        self._preprocess_input(dataset)
+        # self._validate_input(dataset, drop_invalid=drop_invalid)
+        # self._preprocess_input(dataset)
         maps = self._fit(dataset)
 
         if hasattr(self, "masker") and self.masker is not None:
             masker = self.masker
         else:
             masker = dataset.masker
-
+        return maps
         self.results = MetaResult(self, masker, maps)
         return self.results
diff --git a/nimare/utils.py b/nimare/utils.py
index 7940c583c..fab94452d 100755
--- a/nimare/utils.py
+++ b/nimare/utils.py
@@ -232,7 +232,7 @@ def validate_images_df(image_df):
         # Get parent *directory* if shared path includes common prefix.
         if not shared_path.endswith(op.sep):
             shared_path = op.dirname(shared_path) + op.sep
-        LGR.info(f"Shared path detected: '{shared_path}'")
+        # LGR.info(f"Shared path detected: '{shared_path}'")
 
         image_df_out = image_df.copy()  # To avoid SettingWithCopyWarning
         for abs_col in abs_cols:

From 118e7189fb452a15afc2b36d51ee95c4d5ed863f Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 15 Jul 2021 12:18:21 -0400
Subject: [PATCH 18/21] Mention re-labeling (not implemented).

---
 nimare/parcellate.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/nimare/parcellate.py b/nimare/parcellate.py
index c62c8b1a9..b584ab1dc 100644
--- a/nimare/parcellate.py
+++ b/nimare/parcellate.py
@@ -212,6 +212,8 @@ def _fit(self, dataset):
                 ratio = avg_intercenter_distance / avg_intracluster_distance
                 ratios[i_filter, j_cluster] = ratio
 
+        # TODO: Implement re-labeling procedure across filters, for each cluster solution
+
         # return labels, silhouettes, ratios
         # Identify range of optimal filters
         # retained_filter_idx = self._filter_selection(labels)
@@ -221,7 +223,9 @@ def _fit(self, dataset):
 
         # Calculate metrics
         vm_decision_criteria, deviant_props = self._voxel_misclassification(
-            labels, alpha=self.alpha)  # boolx2
+            labels,
+            alpha=self.alpha,
+        )  # boolx2
         vi_results = self._variation_of_information(labels, self.n_clusters)  # floatx2
 
         return labels, silhouettes, ratios, deviant_props, vi_results
@@ -229,7 +233,7 @@ def _fit(self, dataset):
         nvp_results = self._nondominant_voxel_percentage()  # ?
         cdr_results = self._cluster_distance_ratio(ratios)  # float
         vote = np.nansum(
-            np.dstack((vm_results, vi_results, s_results, nvp_results, cdr_results)),
+            np.dstack((vm_decision_criteria, vi_results, s_results, nvp_results, cdr_results)),
             axis=2,
         )
         assert vote.shape == (n_filters, len(self.n_clusters))

From 530a4de1cd4438a66116f81c0156c811626ea40a Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Tue, 21 Dec 2021 13:10:07 -0500
Subject: [PATCH 19/21] Remove added line.

---
 nimare/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nimare/utils.py b/nimare/utils.py
index d6494d487..bdab5edc4 100755
--- a/nimare/utils.py
+++ b/nimare/utils.py
@@ -956,7 +956,6 @@ def _run_shell_command(command, env=None):
         shell=True,
         env=merged_env,
     )
-
     while True:
         line = process.stdout.readline()
         line = str(line, "utf-8")[:-1]

From 1c352f798cff34fddd8851ab4ee6b0af90fd4670 Mon Sep 17 00:00:00 2001
From: JulioAPeraza <jpera054@fiu.edu>
Date: Tue, 4 Oct 2022 13:31:22 -0400
Subject: [PATCH 20/21] Update test_dataset.py

---
 nimare/tests/test_dataset.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nimare/tests/test_dataset.py b/nimare/tests/test_dataset.py
index 96604630d..31a2d79f1 100644
--- a/nimare/tests/test_dataset.py
+++ b/nimare/tests/test_dataset.py
@@ -32,7 +32,6 @@ def test_dataset_smoke():
     assert isinstance(dset.get_metadata(field="sample_sizes"), list)
     assert isinstance(dset.get_studies_by_label("cogat_cognitive_control"), list)
     assert isinstance(dset.get_studies_by_coordinate([20, 20, 20], r=20), list)
-    assert isinstance(dset.get_studies_by_coordinate(np.array([[20, 20, 20]])), list)
 
     assert len(dset.get_studies_by_coordinate([20, 20, 20], n=2)) == 2
     assert len(dset.get_studies_by_coordinate([20, 20, 20], n=3)) == 3

From 0c60dd5e1fdefae51e01bcd5e3bc257ec7b4d802 Mon Sep 17 00:00:00 2001
From: JulioAPeraza <jpera054@fiu.edu>
Date: Tue, 4 Oct 2022 13:44:34 -0400
Subject: [PATCH 21/21] Update utils.py

---
 nimare/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nimare/utils.py b/nimare/utils.py
index 24e233c29..7c388ced1 100644
--- a/nimare/utils.py
+++ b/nimare/utils.py
@@ -1159,7 +1159,7 @@ def _get_cluster_coms(labeled_cluster_arr):
 
     return cluster_coms
 
-  
+
 def variation_of_information(X, Y):
     """Calculate variation of information metric.