BrainLesion · aymuos15 · Jun 17, 2025 · Jun 17, 2025 · Jun 17, 2025 · Jun 17, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -10,7 +10,7 @@ on:
     branches: ["main"]
 
 jobs:
-  build:
+  test:
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
@@ -43,3 +43,35 @@ jobs:
         uses: codecov/codecov-action@v4
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
+
+  test-cuda:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
+
+      - name: Configure poetry
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install poetry
+      - name: Install dependencies with GPU extras
+        run: |
+          python -m poetry install --extras gpu
+      - name: Test CUDA functionality (CPU fallback)
+        run: |
+          python -m poetry run pytest unit_tests/test_cupy_connected_components.py -v
+      - name: Upload coverage results to Codecov (Only on merge to main)
+        # Only upload to Codecov after a merge to the main branch
+        if: github.ref == 'refs/heads/main' && github.event_name == 'push'
+        uses: codecov/codecov-action@v4
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
@@ -7,6 +7,16 @@
 # scipy needs to be installed to run this benchmark, we use cc3d as it is quicker for 3D data
 from scipy import ndimage
 
+# Try to import cupy for GPU acceleration
+try:
+    import cupy as cp
+    from cupyx.scipy import ndimage as cp_ndimage
+
+    CUPY_AVAILABLE = True
+except ImportError:
+    CUPY_AVAILABLE = False
+    print("CuPy not available. GPU benchmarks will be skipped.")
+
 
 def generate_random_binary_mask(size: Tuple[int, int, Union[int, None]]) -> np.ndarray:
     """
@@ -64,6 +74,64 @@ def label_cc3d():
     return cc3d_time
 
 
+def benchmark_cupy(mask: np.ndarray):
+    """
+    Benchmark the performance of cupy.ndimage.label for connected component labeling on GPU.
+
+    Args:
+        mask (np.ndarray): Binary mask to label.
+
+    Returns:
+        float: Time taken to label the mask in seconds, or None if CuPy is not available.
+    """
+    if not CUPY_AVAILABLE:
+        return None
+
+    # Transfer data to GPU
+    mask_gpu = cp.asarray(mask)
-        float: Time taken to label the mask in seconds, or None if CuPy is not available.
-    """
-    if not CUPY_AVAILABLE:
-        return None
-
-    # Transfer data to GPU
-    mask_gpu = cp.asarray(mask)
+        float: Time taken to label the mask in seconds, or None if CuPy is not available or out of memory.
+    """
+    if not CUPY_AVAILABLE:
+        return None
+
+    # Transfer data to GPU
+    try:
+        mask_gpu = cp.asarray(mask)
+    except cp.cuda.memory.OutOfMemoryError as e:
+        print("CuPy OutOfMemoryError: Unable to allocate GPU memory for mask. Skipping GPU benchmark.")
+        return None
+    except cp.cuda.memory.MemoryError as e:
+        print("CuPy MemoryError: Unable to allocate GPU memory for mask. Skipping GPU benchmark.")
+        return None
-        float: Time taken to label the mask in seconds, or None if CuPy is not available.
-    """
-    if not CUPY_AVAILABLE:
-        return None
-
-    # Transfer data to GPU
-    mask_gpu = cp.asarray(mask)
+        float: Time taken to label the mask in seconds, or None if CuPy is not available or out of memory.
+    """
+    if not CUPY_AVAILABLE:
+        return None
+
+    # Transfer data to GPU
+    try:
+        mask_gpu = cp.asarray(mask)
+    except cp.cuda.memory.OutOfMemoryError as e:
+        print("CuPy OutOfMemoryError: Unable to allocate GPU memory for mask. Skipping GPU benchmark.")
+        return None
+    except cp.cuda.memory.MemoryError as e:
+        print("CuPy MemoryError: Unable to allocate GPU memory for mask. Skipping GPU benchmark.")
+        return None
+
+    # Warmup phase
+    for _ in range(3):
+        cp_ndimage.label(mask_gpu)
+        cp.cuda.Stream.null.synchronize()
+
+    def label_cupy():
+        cp_ndimage.label(mask_gpu)
+        cp.cuda.Stream.null.synchronize()  # Ensure GPU computation is complete
+
+    cupy_time = timeit.timeit(label_cupy, number=10)
+
+    # Clean up GPU memory
+    del mask_gpu
+    cp.get_default_memory_pool().free_all_blocks()
-    del mask_gpu
-    cp.get_default_memory_pool().free_all_blocks()
-    del mask_gpu
-    cp.get_default_memory_pool().free_all_blocks()
+
+    return cupy_time
+
+
+def benchmark_panoptica_cupy(mask: np.ndarray):
+    """
+    Benchmark the performance of panoptica's CuPy backend for connected component labeling.
+
+    Args:
+        mask (np.ndarray): Binary mask to label.
+
+    Returns:
+        float: Time taken to label the mask in seconds, or None if CuPy is not available.
+    """
+    if not CUPY_AVAILABLE:
+        return None
+
+    from panoptica._functionals import _connected_components
+    from panoptica.utils.constants import CCABackend
+
+    def label_panoptica_cupy():
+        _connected_components(mask, CCABackend.cupy)
+
+    panoptica_cupy_time = timeit.timeit(label_panoptica_cupy, number=10)
+
+    return panoptica_cupy_time
+
+
 def run_benchmarks(volume_sizes: Tuple[Tuple[int, int, Union[int, None]]]) -> None:
     """
     Run benchmark tests for connected component labeling with different volume sizes.
@@ -80,10 +148,15 @@ def run_benchmarks(volume_sizes: Tuple[Tuple[int, int, Union[int, None]]]) -> No
 
         scipy_time = benchmark_scipy(mask)
         cc3d_time = benchmark_cc3d(mask)
+        cupy_time = benchmark_cupy(mask)
 
         print(f"Volume Size: {size}")
         print(f"Scipy Time: {scipy_time:.4f} seconds")
         print(f"CC3D Time: {cc3d_time:.4f} seconds")
+        if cupy_time is not None:
+            print(f"CuPy Time: {cupy_time:.4f} seconds")
+        else:
+            print("CuPy Time: Not available")
         print()
 
 

diff --git a/panoptica/_functionals.py b/panoptica/_functionals.py
@@ -63,6 +63,19 @@ def _connected_components(
         from scipy.ndimage import label
 
         cc_arr, n_instances = label(array)
+    elif cca_backend == CCABackend.cupy:
+        try:
+            import cupy as cp
+            from cupyx.scipy.ndimage import label as cp_label
+
+            array_gpu = cp.asarray(array)
+            cc_arr, n_instances = cp_label(array_gpu)
+            cc_arr = cp.asnumpy(cc_arr)
+        except ImportError:
+            raise ImportError(
+                "CuPy is not installed. Please install CuPy to use the GPU backend. "
+                "You can install it using: pip install cupy-cuda11x or cupy-cuda12x depending on your CUDA version."
+            )
     else:
         raise NotImplementedError(cca_backend)
 

diff --git a/panoptica/utils/constants.py b/panoptica/utils/constants.py
@@ -89,10 +89,13 @@ class CCABackend(_Enum_Compare):
           [CC3D Website](https://github.com/seung-lab/connected-components-3d)
         - scipy: Represents the SciPy backend for CCA.
           [SciPy Website](https://www.scipy.org/)
+        - cupy: Represents the CuPy backend for GPU-accelerated CCA.
+          [CuPy Website](https://cupy.dev/)
     """
 
     cc3d = auto()
     scipy = auto()
+    cupy = auto()
 
 
 if __name__ == "__main__":

diff --git a/pyproject.toml b/pyproject.toml
@@ -18,7 +18,6 @@ homepage = "https://github.com/BrainLesion/panoptica"
 documentation = "https://panoptica.readthedocs.io/"
 readme = "README.md"
 
-
 # Add the exclude field directly under [tool.poetry]
 exclude = ["examples", "benchmark"]
 
@@ -34,6 +33,15 @@ plotly = "^5.16.1"
 pandas = "^2.1.0"
 typer = ">=0.15.0, <1.0.0"
 
+# Optional GPU dependencies - use precompiled wheels
+cupy-cuda11x = {version = "^13.0.0", optional = true}
+cupy-cuda12x = {version = "^13.0.0", optional = true}
+
+[tool.poetry.extras]
+gpu-cuda11 = ["cupy-cuda11x"]
+gpu-cuda12 = ["cupy-cuda12x"]
+gpu = ["cupy-cuda11x"]  # Default to CUDA 11.x
+
 [tool.poetry.group.dev.dependencies]
 pytest = ">=8.1.1"
 coverage = ">=7.0.1"
@@ -58,4 +66,4 @@ furo = ">=2024.8.6"
 myst-parser = ">=2.0.0"
 
 [tool.poetry.scripts]
-panopticacli = "panoptica.cli:app"
+panopticacli = "panoptica.cli:app"
diff --git a/unit_tests/test_config.py b/unit_tests/test_config.py
@@ -123,7 +123,7 @@ def test_SegmentationClassGroups_config_by_name(self):
             self.assertEqual(len(t[k].value_labels), len(v.value_labels))
 
     def test_InstanceApproximator_config(self):
-        for backend in [None, CCABackend.cc3d, CCABackend.scipy]:
+        for backend in [None, CCABackend.cc3d, CCABackend.scipy, CCABackend.cupy]:
             t = ConnectedComponentsInstanceApproximator(cca_backend=backend)
             print(t)
             print()