From ab0a172a732da320df18579cc39562c880aa5234 Mon Sep 17 00:00:00 2001
From: Mark Keller <7525285+keller-mark@users.noreply.github.com>
Date: Sun, 3 Sep 2023 16:30:08 -0400
Subject: [PATCH 1/5] Refactor with more extras_require

---
 .coveragerc_omit                              |   9 +-
 docs/api_data.rst                             |  10 +-
 pyproject.toml                                |  34 ++-
 tests/test_anndata_utils.py                   |   4 +-
 tests/test_config_converter.py                |   2 +-
 tests/test_entities.py                        | 111 -------
 tests/test_ome_utils.py                       |   2 +-
 vitessce/__init__.py                          |   5 -
 vitessce/data_utils/__init__.py               |  18 --
 vitessce/data_utils/anndata/__init__.py       |  10 +
 vitessce/data_utils/{ => anndata}/anndata.py  |   0
 vitessce/data_utils/entities.py               | 287 ------------------
 vitessce/data_utils/multivec/__init__.py      |   3 +
 vitessce/data_utils/multivec/entities.py      | 124 ++++++++
 .../data_utils/{ => multivec}/multivec.py     |   2 +-
 vitessce/data_utils/ome_tiff/__init__.py      |   4 +
 vitessce/data_utils/ome_tiff/ome_tiff.py      |  46 +++
 vitessce/data_utils/ome_zarr/__init__.py      |   4 +
 .../{ome.py => ome_zarr/ome_zarr.py}          |  49 +--
 .../data_utils/ucsc_cellbrowser/__init__.py   |   4 +
 .../ucsc_cellbrowser/ucsc_cellbrowser.py}     |   2 +-
 21 files changed, 252 insertions(+), 478 deletions(-)
 delete mode 100644 tests/test_entities.py
 create mode 100644 vitessce/data_utils/anndata/__init__.py
 rename vitessce/data_utils/{ => anndata}/anndata.py (100%)
 delete mode 100644 vitessce/data_utils/entities.py
 create mode 100644 vitessce/data_utils/multivec/__init__.py
 create mode 100644 vitessce/data_utils/multivec/entities.py
 rename vitessce/data_utils/{ => multivec}/multivec.py (99%)
 create mode 100644 vitessce/data_utils/ome_tiff/__init__.py
 create mode 100644 vitessce/data_utils/ome_tiff/ome_tiff.py
 create mode 100644 vitessce/data_utils/ome_zarr/__init__.py
 rename vitessce/data_utils/{ome.py => ome_zarr/ome_zarr.py} (70%)
 create mode 100644 vitessce/data_utils/ucsc_cellbrowser/__init__.py
 rename vitessce/{config_converter.py => data_utils/ucsc_cellbrowser/ucsc_cellbrowser.py} (99%)

diff --git a/.coveragerc_omit b/.coveragerc_omit
index 316cb477..2a1a0f58 100644
--- a/.coveragerc_omit
+++ b/.coveragerc_omit
@@ -6,7 +6,8 @@ omit =
     vitessce/widget.py
     vitessce/wrappers.py
     vitessce/repr.py
-    vitessce/data_utils/anndata.py
-    vitessce/data_utils/ome.py
-    vitessce/data_utils/entities.py
-    vitessce/data_utils/multivec.py
\ No newline at end of file
+    vitessce/data_utils/anndata/anndata.py
+    vitessce/data_utils/multivec/multivec.py
+    vitessce/data_utils/ome_tiff/ome_tiff.py
+    vitessce/data_utils/ome_zarr/ome_zarr.py
+    vitessce/data_utils/ucsc_cellbrowser/ucsc_cellbrowser.py
diff --git a/docs/api_data.rst b/docs/api_data.rst
index 34fffd81..5f246ff1 100644
--- a/docs/api_data.rst
+++ b/docs/api_data.rst
@@ -27,7 +27,13 @@ vitessce.export
 vitessce.data_utils
 *****************
 
-.. automodule:: vitessce.data_utils.ome
+.. automodule:: vitessce.data_utils.anndata.anndata
  :members:
-.. automodule:: vitessce.data_utils.anndata
+.. automodule:: vitessce.data_utils.multivec.multivec
+ :members:
+.. automodule:: vitessce.data_utils.ome_tiff.ome_tiff
+ :members:
+.. automodule:: vitessce.data_utils.ome_zarr.ome_zarr
+ :members:
+.. automodule:: vitessce.data_utils.ucsc_cellbrowser.ucsc_cellbrowser
  :members:
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index cd264769..9b930107 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -79,8 +79,40 @@ testing = []
 linting = []
 notebook = []
 
+# data_utils extras_require
+# These dependencies are required
+# to use the corresponding data_utils sub-packages.
+anndata = [
+  'zarr>=2.5.0',
+  'numcodecs>=0.5.7',
+  'anndata>=0.7.8,<0.9',
+  'scanpy>=1.9.3'
+]
+ome_zarr = [
+  'zarr>=2.5.0',
+  'numcodecs>=0.5.7',
+  'ome-zarr==0.2.1'
+]
+ome_tiff = [
+  'generate-tiff-offsets>=0.1.7',
+  'tifffile>=2020.10.1'
+]
+multivec = [
+  'zarr>=2.5.0',
+  'numcodecs>=0.5.7',
+  'negspy>=0.2.24'
+]
+
 [project.urls]
 repository = "https://github.com/vitessce/vitessce-python"
 
 [tool.setuptools]
-packages = ["vitessce", "vitessce.data_utils"]
+packages = [
+  "vitessce",
+  "vitessce.data_utils",
+  "vitessce.data_utils.anndata",
+  "vitessce.data_utils.multivec",
+  "vitessce.data_utils.ome_tiff",
+  "vitessce.data_utils.ome_zarr",
+  "vitessce.data_utils.ucsc_cellbrowser"
+]
diff --git a/tests/test_anndata_utils.py b/tests/test_anndata_utils.py
index b835a00d..c400dfc1 100644
--- a/tests/test_anndata_utils.py
+++ b/tests/test_anndata_utils.py
@@ -7,11 +7,13 @@
 from scipy.io import mmread
 import zarr
 
-from vitessce.data_utils import (
+from vitessce.data_utils.anndata import (
     optimize_arr,
     optimize_adata,
     sort_var_axis,
     to_uint8,
+)
+from vitessce.data_utils.multivec import (
     adata_to_multivec_zarr,
 )
 
diff --git a/tests/test_config_converter.py b/tests/test_config_converter.py
index e4926239..fd339780 100644
--- a/tests/test_config_converter.py
+++ b/tests/test_config_converter.py
@@ -2,7 +2,7 @@
 from unittest.mock import patch, Mock
 from copy import deepcopy
 
-from vitessce import (
+from vitessce.data_utils.ucsc_cellbrowser import (
     CellBrowserToAnndataZarrConverter,
     convert_cell_browser_project_to_anndata,
 )
diff --git a/tests/test_entities.py b/tests/test_entities.py
deleted file mode 100644
index cdcda782..00000000
--- a/tests/test_entities.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import pytest
-
-from vitessce.data_utils.entities import (
-    CellSets,
-    Cells,
-)
-
-
-def test_cells():
-
-    cell_ids = ['cell_1', 'cell_2', 'cell_3']
-
-    cells = Cells(cell_ids=cell_ids)
-    assert list(cells.json.keys()) == cell_ids
-
-    cells.add_mapping('umap', [[1, 1], [2, 2], [3, 3]])
-
-    cells.add_mapping('pca', [[1, 1], [2, 2], [3, 3]])
-
-    cells.add_centroids([[1, 1], [2, 2], [3, 3]])
-
-    cells.add_polygon_outline(
-        [[[1, 1], [1, 1], [1, 1]], [[2, 2], [2, 2], [2, 2]], [[3, 3], [3, 3], [3, 3]]])
-
-    assert cells.json == {
-        'cell_1': {
-            'mappings': {'umap': [1, 1], 'pca': [1, 1]},
-            'xy': [1, 1],
-            'poly': [[1, 1], [1, 1], [1, 1]]
-        },
-        'cell_2': {
-            'mappings': {'umap': [2, 2], 'pca': [2, 2]},
-            'xy': [2, 2],
-            'poly': [[2, 2], [2, 2], [2, 2]]
-        },
-        'cell_3': {
-            'mappings': {'umap': [3, 3], 'pca': [3, 3]},
-            'xy': [3, 3],
-            'poly': [[3, 3], [3, 3], [3, 3]]
-        }
-    }
-
-
-def test_cells_bad_polygon_outline_type():
-
-    cell_ids = ['cell_1', 'cell_2', 'cell_3']
-    cells = Cells(cell_ids=cell_ids)
-    with pytest.raises(Exception) as context:
-        # The extra 3 should be problematic since polygons are two dimensional.
-        cells.add_polygon_outline([
-            [[1, 1, 3], [1, 1], [1, 1]],
-            [[2, 2], [2, 2], [2, 2]],
-            [[3, 3], [3, 3], [3, 3]]
-        ])
-    assert 'Polygon outline for cell_1 should be a list of two element lists i.e xy coordinates' in str(context)
-
-
-def test_cells_bad_mappings_length():
-
-    cell_ids = ['cell_1', 'cell_2', 'cell_3']
-    cells = Cells(cell_ids=cell_ids)
-    with pytest.raises(Exception) as context:
-        # There are 3 cells in this object so only passing in two scatterplot cooridnates is problematic.
-        cells.add_mapping('umap', [[1, 1], [2, 2]])
-    assert 'Coordinates length does not match Cell IDs Length' in str(context)
-
-
-def test_cell_sets():
-
-    cell_sets = CellSets()
-    cell_sets.add_level_zero_node('Clusters')
-
-    cell_sets.add_node('Cluster 1', ['Clusters'])
-    cell_sets.add_node('Cluster 2', ['Clusters'])
-    cell_sets.add_node('Subcluster 1', ['Clusters', 'Cluster 1'], ['cell_1', 'cell_2'])
-    cell_sets.add_node('Subcluster 2', ['Clusters', 'Cluster 1'], ['cell_3', 'cell_4'])
-
-    assert cell_sets.json == {
-        "datatype": "cell",
-        "version": "0.1.2",
-        "tree": [{
-            "name": 'Clusters',
-            "children": [
-                {
-                    "name": 'Cluster 1',
-                    "children": [
-                        {
-                            "name": 'Subcluster 1',
-                            "set": ['cell_1', 'cell_2']
-                        },
-                        {
-                            "name": 'Subcluster 2',
-                            "set": ['cell_3', 'cell_4']
-                        }
-                    ]
-                },
-                {
-                    "name": 'Cluster 2',
-                }
-            ]
-        }]
-    }
-
-
-def test_cell_sets_node_not_found():
-
-    cell_sets = CellSets()
-    with pytest.raises(Exception) as context:
-        cell_sets.add_node('Cluster 1', ['Clusters Not Found'])
-
-    assert "No node with path ['Clusters Not Found'] found to add Cluster 1 to" in str(context)
diff --git a/tests/test_ome_utils.py b/tests/test_ome_utils.py
index a9d183ef..a77758ed 100644
--- a/tests/test_ome_utils.py
+++ b/tests/test_ome_utils.py
@@ -3,7 +3,7 @@
 import zarr
 import numpy as np
 
-from vitessce.data_utils import (
+from vitessce.data_utils.ome_zarr import (
     rgb_img_to_ome_zarr,
 )
 
diff --git a/vitessce/__init__.py b/vitessce/__init__.py
index 3e948518..ae8f91b3 100644
--- a/vitessce/__init__.py
+++ b/vitessce/__init__.py
@@ -21,11 +21,6 @@
     BASE_URL_PLACEHOLDER,
 )
 
-from .config_converter import (
-    CellBrowserToAnndataZarrConverter,  # only exported for testing.
-    convert_cell_browser_project_to_anndata,
-)
-
 from .wrappers import AbstractWrapper
 
 # We allow installation without all of the dependencies that the widget requires.
diff --git a/vitessce/data_utils/__init__.py b/vitessce/data_utils/__init__.py
index 88077ca1..e69de29b 100644
--- a/vitessce/data_utils/__init__.py
+++ b/vitessce/data_utils/__init__.py
@@ -1,18 +0,0 @@
-from .anndata import (
-    optimize_adata,
-    optimize_arr,
-    to_dense,
-    to_uint8,
-    sort_var_axis,
-    to_diamond,
-    VAR_CHUNK_SIZE,
-)
-from .ome import (
-    rgb_img_to_ome_zarr,
-    multiplex_img_to_ome_zarr,
-    rgb_img_to_ome_tiff,
-    multiplex_img_to_ome_tiff,
-)
-from .multivec import (
-    adata_to_multivec_zarr,
-)
diff --git a/vitessce/data_utils/anndata/__init__.py b/vitessce/data_utils/anndata/__init__.py
new file mode 100644
index 00000000..2b596475
--- /dev/null
+++ b/vitessce/data_utils/anndata/__init__.py
@@ -0,0 +1,10 @@
+from .anndata import (
+    optimize_adata,
+    optimize_arr,
+    to_dense,
+    to_uint8,
+    sort_var_axis,
+    to_diamond,
+    VAR_CHUNK_SIZE,
+    cast_arr,
+)
diff --git a/vitessce/data_utils/anndata.py b/vitessce/data_utils/anndata/anndata.py
similarity index 100%
rename from vitessce/data_utils/anndata.py
rename to vitessce/data_utils/anndata/anndata.py
diff --git a/vitessce/data_utils/entities.py b/vitessce/data_utils/entities.py
deleted file mode 100644
index aa32ddfb..00000000
--- a/vitessce/data_utils/entities.py
+++ /dev/null
@@ -1,287 +0,0 @@
-import negspy.coordinates as nc
-import numpy as np
-import math
-
-
-class ArgumentLengthDoesNotMatchCellIdsException(Exception):
-    pass
-
-
-class NodeNotFoundException(Exception):
-    pass
-
-
-class Cells:
-
-    """
-    Generic Cells class for constructing the json needed for client side rendering of cell segmentations/scatterplots (UMAP, PCA etc.).
-
-    :param json The json resulting from various calls to add_mapping, add_polygon_outline etc. that can be served to the client.
-    """
-
-    def __init__(self, cell_ids=[]):
-        """
-        Constructor method
-
-        :param list cell_ids: A list of cell ids to be shown in Vitessce.  The order of these will be used to determine the order of future additions to this class, like segmentations and scatterplot coordinates.
-        """
-        self._cell_ids = cell_ids
-        self.json = dict(zip(cell_ids, [{} for _ in cell_ids]))
-
-    def add_mapping(self, name, coords):
-        """
-        Add a (dimensionality reduction) scatterplot mapping to each cell.
-
-        :param str name: The unique identifier for the mapping, like UMAP, tSNE or PCA.
-        :param list coords: A list of lists like [[1, 2], [3, 4], ...] in the order of cell_ids for each cell to be mapped to a scatterplot coorindate.
-        """
-        if len(coords) != len(self._cell_ids):
-            raise ArgumentLengthDoesNotMatchCellIdsException(
-                'Coordinates length does not match Cell IDs Length')
-        if not isinstance(name, str):
-            raise TypeError(
-                'name argument needs to be a string for adding a scatterplot mapping')
-        for idx, id in enumerate(self._cell_ids):
-            if 'mappings' not in self.json[id]:
-                self.json[id]['mappings'] = {name: coords[idx]}
-            else:
-                self.json[id]['mappings'][name] = coords[idx]
-
-    def add_centroids(self, centroids):
-        """
-        Add a centroid for a spatial segmentation outline to each cell.
-
-        :param list centroids: A list of lists like [[1, 2], [3, 4], ...] in the order of cell_ids for each cell to be mapped to a centroid coorindate.
-        """
-        if len(centroids) != len(self._cell_ids):
-            raise ArgumentLengthDoesNotMatchCellIdsException(
-                'Centroid length does not match Cell IDs Length')
-        if not isinstance(centroids, list) or any([len(centroid) != 2 or not isinstance(centroid, list) for centroid in centroids]):
-            raise TypeError('Centroids should be a list of two element lists')
-        for idx, id in enumerate(self._cell_ids):
-            self.json[id]['xy'] = centroids[idx]
-
-    def add_polygon_outline(self, polygon_outline):
-        """
-        Add a polygon for a spatial segmentation outline to each cell.
-
-        :param list polygon_outline: A list of lists of lists like [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]...] in the order of cell_ids for each cell to be mapped to its segmentation.
-        """
-        if len(polygon_outline) != len(self._cell_ids):
-            raise ArgumentLengthDoesNotMatchCellIdsException(
-                'Segmentations length does not match Cell IDs Length')
-        for idx, id in enumerate(self._cell_ids):
-            if not isinstance(polygon_outline[idx], list) or any([len(coord) != 2 or not isinstance(coord, list) for coord in polygon_outline[idx]]):
-                raise TypeError(
-                    f'Polygon outline for {id} should be a list of two element lists i.e xy coordinates')
-            self.json[id]['poly'] = polygon_outline[idx]
-
-
-class CellSets:
-
-    """
-    Generic CellSets class for constructing the json needed for client side rendering of the cell sets.
-
-    :param json The json resulting from various calls to add_node that can be served to the client.
-    """
-
-    def __init__(self):
-        """
-        Constructor method
-        """
-
-        self.json = {
-            "datatype": "cell",
-            "version": "0.1.2",
-            "tree": []
-        }
-
-    def add_level_zero_node(self, name):
-        """
-        Add a new level zero node to the root of the tree.
-
-        :param str name: Name for the new node
-        """
-        self.json['tree'].append({
-            "name": name,
-            "children": []
-        })
-
-    def add_node(self, name, parent_path, cell_set=None):
-        """
-        Add a node to a parent node.
-
-        :param str name: Name for the new node
-        :param list parent_path: List of strings representing the internal nodes to traverse to reach the desired parent node to which we will add the new node, like ['epithelial', 'meso-epithelial']
-        :param list cell_set: List of cell ids which will be added to the new node as part of the set.
-        """
-        parent_node = self._tree_find_node_by_path(parent_path)
-        if parent_node is None:
-            raise NodeNotFoundException(
-                f'No node with path {parent_path} found to add {name} to')
-        new_node = {"name": name}
-        if cell_set:
-            new_node['set'] = cell_set
-        if 'children' not in parent_node:
-            parent_node['children'] = [new_node]
-        else:
-            parent_node['children'].append(new_node)
-
-    def _find_node_by_path(self, node, path, curr_index):
-        curr_node_name = path[curr_index]
-        if node['name'] == curr_node_name:
-            if curr_index == len(path) - 1:
-                return node
-            if 'children' in node:
-                found_nodes = [
-                    self._find_node_by_path(child, path, curr_index + 1) for child in node['children']
-                ]
-                found_nodes_not_none = [n for n in found_nodes if n]
-                if len(found_nodes_not_none) == 1:
-                    return found_nodes[0]
-        return None
-
-    def _tree_find_node_by_path(self, path):
-        found_nodes = [self._find_node_by_path(
-            node, path, 0) for node in self.json['tree']]
-        found_nodes_not_none = [n for n in found_nodes if n]
-        if len(found_nodes_not_none) == 1:
-            return found_nodes_not_none[0]
-        return None
-
-
-class Molecules():
-
-    """
-    Generic Molecules class for constructing the json needed for client side rendering of spot data.
-
-    :param json The json resulting from various calls to add_molecule.
-    """
-
-    def __init__(self):
-        """
-        Constructor method
-        """
-        self.json = {}
-
-    def add_molecule(self, name, coords):
-        """
-        Add a molecules to a parent node.
-
-        :param str name: Name for the new molecules
-        :param list coords: A list of lists like [[1, 2], [3, 4], ...] or [[1, 2, 3], [3, 4, 5], ...] which denote where in xy space the spot data should be placed for the desired name.
-        """
-        self.json[name] = coords
-
-
-class GenomicProfiles():
-
-    """
-    Generic class for representing genomic profiles.
-    """
-
-    def __init__(self, f, profile_paths, assembly='hg38', starting_resolution=5000, name="Genomic Profiles"):
-        """
-        Constructor method
-
-        :param f: The opened Zarr store object.
-        :type f: zarr.Group
-        :param list[list[str]] profile_paths: A list of cell set paths, one path for each profile.
-        :param str assembly: The genome assembly to use for chromosome lengths, passed to negspy. By default, 'hg38'.
-        :param int starting_resolution: The starting resolution. By default, 5000.
-        :param str name: The name for this set of profiles. By default, 'Genomic Profiles'.
-        """
-
-        self.f = f
-
-        num_profiles = len(profile_paths)
-
-        compressor = 'default'
-
-        chromosomes = [str(chr_name) for chr_name in nc.get_chromorder(
-            assembly)[:25]]  # TODO: should more than chr1-chrM be used?
-        chroms_length_arr = np.array(
-            [nc.get_chrominfo(assembly).chrom_lengths[x] for x in chromosomes], dtype="i8")
-        chroms_cumsum_arr = np.concatenate(
-            (np.array([0]), np.cumsum(chroms_length_arr)))
-
-        chrom_name_to_length = dict(zip(chromosomes, chroms_length_arr))
-        chrom_name_to_cumsum = dict(zip(chromosomes, chroms_cumsum_arr))
-
-        # Prepare to fill in resolutions datasets.
-        resolutions = [starting_resolution * (2 ** x) for x in range(16)]
-
-        chromosomes_group = f.create_group("chromosomes")
-        for chr_name, chr_len in chrom_name_to_length.items():
-            chr_group = chromosomes_group.create_group(chr_name)
-            # Create each resolution group.
-            for resolution in resolutions:
-                chr_shape = (num_profiles, math.ceil(chr_len / resolution))
-                chr_group.create_dataset(str(
-                    resolution), shape=chr_shape, dtype="f4", fill_value=np.nan, compressor=compressor)
-
-        # f.attrs should contain the properties required for HiGlass's "tileset_info" requests.
-        f.attrs['row_infos'] = [
-            {"path": profile_path}
-            for profile_path in profile_paths
-        ]
-        f.attrs['resolutions'] = sorted(resolutions, reverse=True)
-        f.attrs['shape'] = [num_profiles, 256]
-        f.attrs['name'] = name
-        f.attrs['coordSystem'] = assembly
-
-        self.resolutions = resolutions
-        self.chromosomes = chromosomes
-        self.chromosomes_group = chromosomes_group
-        self.chrom_name_to_length = chrom_name_to_length
-        self.num_profiles = num_profiles
-
-        # https://github.com/zarr-developers/zarr-specs/issues/50
-        f.attrs['multiscales'] = [
-            {
-                "version": "0.1",
-                "name": chr_name,
-                "datasets": [
-                    {"path": f"chromosomes/{chr_name}/{resolution}"}
-                    for resolution in sorted(resolutions, reverse=True)
-                ],
-                "type": "zarr-multivec",
-                "metadata": {
-                    "chromoffset": int(chrom_name_to_cumsum[chr_name]),
-                    "chromsize": int(chr_len),
-                }
-            }
-            for (chr_name, chr_len) in list(zip(chromosomes, chroms_length_arr))
-        ]
-
-    def add_profile(self, values, chr_name, profile_index):
-        """
-        Add a single genomic profile to the output store. This function will aggregate for each resolution.
-
-        :param values: A profile array for one chromosome.
-        :type values: np.array
-        :param str chr_name: The name the chromosome corresponding to this array.
-        :param int profile_index: The index of this profile among the list of profiles.
-        """
-        chromosomes_group = self.chromosomes_group
-        resolutions = self.resolutions
-        resolution_exps = [(2**x) for x in range(len(resolutions))]
-
-        chr_len = self.chrom_name_to_length[chr_name]
-        # Fill in the data for this cluster and chromosome at each resolution.
-        for resolution, resolution_exp in zip(resolutions, resolution_exps):
-            arr_len = math.ceil(chr_len / resolution)
-
-            # Pad the array of values with zeros if necessary before reshaping.
-            padding_len = resolution_exp - (values.shape[0] % resolution_exp)
-            if values.shape[0] % resolution_exp > 0:
-                values = np.concatenate((values, np.zeros((padding_len,))))
-            # Reshape to be able to sum every `resolution_exp` number of values.
-            arr = np.reshape(values, (-1, resolution_exp)).sum(axis=-1)
-
-            padding_len = arr_len - arr.shape[0]
-            if padding_len > 0:
-                arr = np.concatenate((arr, np.zeros((padding_len,))))
-            # Set the array in the Zarr store.
-            chromosomes_group[chr_name][str(
-                resolution)][profile_index, :] = arr
diff --git a/vitessce/data_utils/multivec/__init__.py b/vitessce/data_utils/multivec/__init__.py
new file mode 100644
index 00000000..3dad028e
--- /dev/null
+++ b/vitessce/data_utils/multivec/__init__.py
@@ -0,0 +1,3 @@
+from .multivec import (
+    adata_to_multivec_zarr,
+)
diff --git a/vitessce/data_utils/multivec/entities.py b/vitessce/data_utils/multivec/entities.py
new file mode 100644
index 00000000..d8c6cbdd
--- /dev/null
+++ b/vitessce/data_utils/multivec/entities.py
@@ -0,0 +1,124 @@
+import negspy.coordinates as nc
+import numpy as np
+import math
+
+
+class ArgumentLengthDoesNotMatchCellIdsException(Exception):
+    pass
+
+
+class NodeNotFoundException(Exception):
+    pass
+
+
+class GenomicProfiles():
+
+    """
+    Generic class for representing genomic profiles.
+    """
+
+    def __init__(self, f, profile_paths, assembly='hg38', starting_resolution=5000, name="Genomic Profiles"):
+        """
+        Constructor method
+
+        :param f: The opened Zarr store object.
+        :type f: zarr.Group
+        :param list[list[str]] profile_paths: A list of cell set paths, one path for each profile.
+        :param str assembly: The genome assembly to use for chromosome lengths, passed to negspy. By default, 'hg38'.
+        :param int starting_resolution: The starting resolution. By default, 5000.
+        :param str name: The name for this set of profiles. By default, 'Genomic Profiles'.
+        """
+
+        self.f = f
+
+        num_profiles = len(profile_paths)
+
+        compressor = 'default'
+
+        chromosomes = [str(chr_name) for chr_name in nc.get_chromorder(
+            assembly)[:25]]  # TODO: should more than chr1-chrM be used?
+        chroms_length_arr = np.array(
+            [nc.get_chrominfo(assembly).chrom_lengths[x] for x in chromosomes], dtype="i8")
+        chroms_cumsum_arr = np.concatenate(
+            (np.array([0]), np.cumsum(chroms_length_arr)))
+
+        chrom_name_to_length = dict(zip(chromosomes, chroms_length_arr))
+        chrom_name_to_cumsum = dict(zip(chromosomes, chroms_cumsum_arr))
+
+        # Prepare to fill in resolutions datasets.
+        resolutions = [starting_resolution * (2 ** x) for x in range(16)]
+
+        chromosomes_group = f.create_group("chromosomes")
+        for chr_name, chr_len in chrom_name_to_length.items():
+            chr_group = chromosomes_group.create_group(chr_name)
+            # Create each resolution group.
+            for resolution in resolutions:
+                chr_shape = (num_profiles, math.ceil(chr_len / resolution))
+                chr_group.create_dataset(str(
+                    resolution), shape=chr_shape, dtype="f4", fill_value=np.nan, compressor=compressor)
+
+        # f.attrs should contain the properties required for HiGlass's "tileset_info" requests.
+        f.attrs['row_infos'] = [
+            {"path": profile_path}
+            for profile_path in profile_paths
+        ]
+        f.attrs['resolutions'] = sorted(resolutions, reverse=True)
+        f.attrs['shape'] = [num_profiles, 256]
+        f.attrs['name'] = name
+        f.attrs['coordSystem'] = assembly
+
+        self.resolutions = resolutions
+        self.chromosomes = chromosomes
+        self.chromosomes_group = chromosomes_group
+        self.chrom_name_to_length = chrom_name_to_length
+        self.num_profiles = num_profiles
+
+        # https://github.com/zarr-developers/zarr-specs/issues/50
+        f.attrs['multiscales'] = [
+            {
+                "version": "0.1",
+                "name": chr_name,
+                "datasets": [
+                    {"path": f"chromosomes/{chr_name}/{resolution}"}
+                    for resolution in sorted(resolutions, reverse=True)
+                ],
+                "type": "zarr-multivec",
+                "metadata": {
+                    "chromoffset": int(chrom_name_to_cumsum[chr_name]),
+                    "chromsize": int(chr_len),
+                }
+            }
+            for (chr_name, chr_len) in list(zip(chromosomes, chroms_length_arr))
+        ]
+
+    def add_profile(self, values, chr_name, profile_index):
+        """
+        Add a single genomic profile to the output store. This function will aggregate for each resolution.
+
+        :param values: A profile array for one chromosome.
+        :type values: np.array
+        :param str chr_name: The name the chromosome corresponding to this array.
+        :param int profile_index: The index of this profile among the list of profiles.
+        """
+        chromosomes_group = self.chromosomes_group
+        resolutions = self.resolutions
+        resolution_exps = [(2**x) for x in range(len(resolutions))]
+
+        chr_len = self.chrom_name_to_length[chr_name]
+        # Fill in the data for this cluster and chromosome at each resolution.
+        for resolution, resolution_exp in zip(resolutions, resolution_exps):
+            arr_len = math.ceil(chr_len / resolution)
+
+            # Pad the array of values with zeros if necessary before reshaping.
+            padding_len = resolution_exp - (values.shape[0] % resolution_exp)
+            if values.shape[0] % resolution_exp > 0:
+                values = np.concatenate((values, np.zeros((padding_len,))))
+            # Reshape to be able to sum every `resolution_exp` number of values.
+            arr = np.reshape(values, (-1, resolution_exp)).sum(axis=-1)
+
+            padding_len = arr_len - arr.shape[0]
+            if padding_len > 0:
+                arr = np.concatenate((arr, np.zeros((padding_len,))))
+            # Set the array in the Zarr store.
+            chromosomes_group[chr_name][str(
+                resolution)][profile_index, :] = arr
diff --git a/vitessce/data_utils/multivec.py b/vitessce/data_utils/multivec/multivec.py
similarity index 99%
rename from vitessce/data_utils/multivec.py
rename to vitessce/data_utils/multivec/multivec.py
index c1460f6b..ad041900 100644
--- a/vitessce/data_utils/multivec.py
+++ b/vitessce/data_utils/multivec/multivec.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pandas as pd
 
-from .anndata import to_dense
+from ..anndata import to_dense
 from .entities import GenomicProfiles
 
 
diff --git a/vitessce/data_utils/ome_tiff/__init__.py b/vitessce/data_utils/ome_tiff/__init__.py
new file mode 100644
index 00000000..cbe49d79
--- /dev/null
+++ b/vitessce/data_utils/ome_tiff/__init__.py
@@ -0,0 +1,4 @@
+from .ome_tiff import (
+    rgb_img_to_ome_tiff,
+    multiplex_img_to_ome_tiff,
+)
diff --git a/vitessce/data_utils/ome_tiff/ome_tiff.py b/vitessce/data_utils/ome_tiff/ome_tiff.py
new file mode 100644
index 00000000..bfd1ec28
--- /dev/null
+++ b/vitessce/data_utils/ome_tiff/ome_tiff.py
@@ -0,0 +1,46 @@
+import numpy as np
+from tifffile import TiffWriter
+
+
+def rgb_img_to_ome_tiff(img_arr, output_path, img_name="Image", axes="CYX"):
+    """
+    Convert an RGB image to OME-TIFF.
+
+    :param img_arr: The image as a 3D array.
+    :type img_arr: np.array
+    :param str output_path: The path to save the Zarr store.
+    :param str img_name: The name of the image to include in the omero.name NGFF metadata field.
+    :param str axes: The array axis ordering. By default, "CYX"
+    """
+    img_arr = img_arr.astype(np.dtype('uint8'))
+
+    tiff_writer = TiffWriter(output_path, ome=True)
+    tiff_writer.write(
+        img_arr,
+        metadata={
+            'axes': axes,
+            'Channel': {'Name': ['R', 'G', 'B']},
+        }
+    )
+    tiff_writer.close()
+
+
+def multiplex_img_to_ome_tiff(img_arr, channel_names, output_path, axes="CYX"):
+    """
+    Convert a multiplexed image to OME-TIFF.
+
+    :param img_arr: The image as a 3D, 4D, or 5D array.
+    :type img_arr: np.array
+    :param list[str] channel_names: A list of channel names to include in the omero.channels[].label NGFF metadata field.
+    :param str output_path: The path to save the Zarr store.
+    :param str axes: The array axis ordering. By default, "CYX"
+    """
+    tiff_writer = TiffWriter(output_path, ome=True)
+    tiff_writer.write(
+        img_arr,
+        metadata={
+            'axes': axes,
+            'Channel': {'Name': channel_names},
+        }
+    )
+    tiff_writer.close()
diff --git a/vitessce/data_utils/ome_zarr/__init__.py b/vitessce/data_utils/ome_zarr/__init__.py
new file mode 100644
index 00000000..92201844
--- /dev/null
+++ b/vitessce/data_utils/ome_zarr/__init__.py
@@ -0,0 +1,4 @@
+from .ome_zarr import (
+    rgb_img_to_ome_zarr,
+    multiplex_img_to_ome_zarr,
+)
diff --git a/vitessce/data_utils/ome.py b/vitessce/data_utils/ome_zarr/ome_zarr.py
similarity index 70%
rename from vitessce/data_utils/ome.py
rename to vitessce/data_utils/ome_zarr/ome_zarr.py
index 8f87c33c..af747e33 100644
--- a/vitessce/data_utils/ome.py
+++ b/vitessce/data_utils/ome_zarr/ome_zarr.py
@@ -1,52 +1,10 @@
 import numpy as np
 import zarr
 from ome_zarr.writer import write_image
-from tifffile import TiffWriter
-from .anndata import cast_arr
 
-
-def rgb_img_to_ome_tiff(img_arr, output_path, img_name="Image", axes="CYX"):
-    """
-    Convert an RGB image to OME-TIFF.
-
-    :param img_arr: The image as a 3D array.
-    :type img_arr: np.array
-    :param str output_path: The path to save the Zarr store.
-    :param str img_name: The name of the image to include in the omero.name NGFF metadata field.
-    :param str axes: The array axis ordering. By default, "CYX"
-    """
-    img_arr = img_arr.astype(np.dtype('uint8'))
-
-    tiff_writer = TiffWriter(output_path, ome=True)
-    tiff_writer.write(
-        img_arr,
-        metadata={
-            'axes': axes,
-            'Channel': {'Name': ['R', 'G', 'B']},
-        }
-    )
-    tiff_writer.close()
-
-
-def multiplex_img_to_ome_tiff(img_arr, channel_names, output_path, axes="CYX"):
-    """
-    Convert a multiplexed image to OME-TIFF.
-
-    :param img_arr: The image as a 3D, 4D, or 5D array.
-    :type img_arr: np.array
-    :param list[str] channel_names: A list of channel names to include in the omero.channels[].label NGFF metadata field.
-    :param str output_path: The path to save the Zarr store.
-    :param str axes: The array axis ordering. By default, "CYX"
-    """
-    tiff_writer = TiffWriter(output_path, ome=True)
-    tiff_writer.write(
-        img_arr,
-        metadata={
-            'axes': axes,
-            'Channel': {'Name': channel_names},
-        }
-    )
-    tiff_writer.close()
+from ..anndata import (
+    cast_arr,
+)
 
 
 def rgb_img_to_ome_zarr(img_arr, output_path, img_name="Image", chunks=(1, 256, 256), axes="cyx"):
@@ -115,6 +73,7 @@ def multiplex_img_to_ome_zarr(img_arr, channel_names, output_path, img_name="Ima
     :param channel_colors: Dict mapping channel names to color strings to use for the omero.channels[].color NGFF metadata field. If provided, keys should match channel_names. By default, None to use "FFFFFF" for all channels.
     :type channel_colors: dict or None
     """
+
     img_arr = cast_arr(img_arr)
 
     dtype_info = np.iinfo(img_arr.dtype) if img_arr.dtype.kind == 'u' or img_arr.dtype.kind == 'i' else np.finfo(img_arr.dtype)
diff --git a/vitessce/data_utils/ucsc_cellbrowser/__init__.py b/vitessce/data_utils/ucsc_cellbrowser/__init__.py
new file mode 100644
index 00000000..dfc0ebbf
--- /dev/null
+++ b/vitessce/data_utils/ucsc_cellbrowser/__init__.py
@@ -0,0 +1,4 @@
+from .ucsc_cellbrowser import (
+    CellBrowserToAnndataZarrConverter,  # only exported for testing.
+    convert_cell_browser_project_to_anndata,
+)
diff --git a/vitessce/config_converter.py b/vitessce/data_utils/ucsc_cellbrowser/ucsc_cellbrowser.py
similarity index 99%
rename from vitessce/config_converter.py
rename to vitessce/data_utils/ucsc_cellbrowser/ucsc_cellbrowser.py
index b3b57bf5..133d04af 100644
--- a/vitessce/config_converter.py
+++ b/vitessce/data_utils/ucsc_cellbrowser/ucsc_cellbrowser.py
@@ -6,7 +6,7 @@
 import gzip
 import io
 
-from vitessce.data_utils import (
+from ..anndata import (
     optimize_adata,
 )
 

From a103b030248bbc3d8013b22389c6b889aea536cc Mon Sep 17 00:00:00 2001
From: Mark Keller <7525285+keller-mark@users.noreply.github.com>
Date: Sun, 3 Sep 2023 17:53:12 -0400
Subject: [PATCH 2/5] Lint, update docs

---
 docs/api_data.rst       | 16 +---------------
 docs/api_data_utils.rst | 20 ++++++++++++++++++++
 docs/index.rst          | 22 ++++++++++++++++++++++
 setup.cfg               |  5 +++++
 4 files changed, 48 insertions(+), 15 deletions(-)
 create mode 100644 docs/api_data_utils.rst

diff --git a/docs/api_data.rst b/docs/api_data.rst
index 5f246ff1..8bbce224 100644
--- a/docs/api_data.rst
+++ b/docs/api_data.rst
@@ -1,4 +1,4 @@
-Data preparation APIs
+Data config APIs
 #####################
 
 Dataset wrapper classes
@@ -23,17 +23,3 @@ vitessce.export
 
 .. automodule:: vitessce.export
  :members:
-
-vitessce.data_utils
-*****************
-
-.. automodule:: vitessce.data_utils.anndata.anndata
- :members:
-.. automodule:: vitessce.data_utils.multivec.multivec
- :members:
-.. automodule:: vitessce.data_utils.ome_tiff.ome_tiff
- :members:
-.. automodule:: vitessce.data_utils.ome_zarr.ome_zarr
- :members:
-.. automodule:: vitessce.data_utils.ucsc_cellbrowser.ucsc_cellbrowser
- :members:
\ No newline at end of file
diff --git a/docs/api_data_utils.rst b/docs/api_data_utils.rst
new file mode 100644
index 00000000..a2af5556
--- /dev/null
+++ b/docs/api_data_utils.rst
@@ -0,0 +1,20 @@
+Data conversion APIs
+#####################
+
+Data conversion functions
+provide functionality for converting data
+to Vitessce-compatible formats.
+
+vitessce.data_utils
+*****************
+
+.. automodule:: vitessce.data_utils.anndata.anndata
+ :members:
+.. automodule:: vitessce.data_utils.multivec.multivec
+ :members:
+.. automodule:: vitessce.data_utils.ome_tiff.ome_tiff
+ :members:
+.. automodule:: vitessce.data_utils.ome_zarr.ome_zarr
+ :members:
+.. automodule:: vitessce.data_utils.ucsc_cellbrowser.ucsc_cellbrowser
+ :members:
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index c47bf0b3..f7958bcd 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -29,6 +29,27 @@ Installation requires Python 3.9 or greater. Install from `PyPI <https://pypi.or
     pip install vitessce[all]
 
 
+Notes:
+
+* The most minimal ``pip install vitessce`` installs only those dependencies required to use the configuration and wrapper classes, such as ``VitessceConfig`` and ``AnnDataWrapper``.
+* The second-most minimal ``pip install vitessce[all]`` additionally installs the dependencies required to use the Jupyter widget, enabling ``VitessceConfig.widget()``.
+* Data conversion dependencies for usage of ``vitessce.data_utils`` submodules must be installed explicitly as described below.
+
+To use functions from ``vitessce.data_utils.{submodule}``, the name of each submodule intended to be used must be specified in `extras <https://peps.python.org/pep-0508/#extras>`_:
+
+.. code-block:: bash
+
+    pip install vitessce[anndata,multivec,ome_tiff,ome_zarr,ucsc_cellbrowser]
+  
+For example, to install the Jupyter widget and the data conversion dependencies for ``vitessce.data_utils.anndata``:
+
+.. code-block:: bash
+
+    pip install vitessce[all,anndata]
+
+
+
+
 Widget Compatibility
 --------------------
 
@@ -47,6 +68,7 @@ The Vitessce widget is compatible with the following interactive Python platform
    data_examples
    api_config
    api_data
+   api_data_utils
    data_options
    screenshots
 
diff --git a/setup.cfg b/setup.cfg
index b20c7454..f71446ce 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -6,6 +6,11 @@ per-file-ignores =
   # Special case: names are reimported from __init__.py, so unused imports are expected.
   vitessce/__init__.py: F401
   vitessce/data_utils/__init__.py: F401
+  vitessce/data_utils/anndata/__init__.py: F401
+  vitessce/data_utils/multivec/__init__.py: F401
+  vitessce/data_utils/ome_tiff/__init__.py: F401
+  vitessce/data_utils/ome_zarr/__init__.py: F401
+  vitessce/data_utils/ucsc_cellbrowser/__init__.py: F401
 ignore =
   E501, # Ignore line too long
   W605, # Ignore invalid escape sequence '\*'

From 110b749063d4d68eab2f770c75ffaabf1f38a87b Mon Sep 17 00:00:00 2001
From: Mark Keller <7525285+keller-mark@users.noreply.github.com>
Date: Sun, 3 Sep 2023 17:56:55 -0400
Subject: [PATCH 3/5] Update notebook imports

---
 .../notebooks/cellbrowser_to_vitessce_config_conversion.ipynb | 4 ++--
 docs/notebooks/data_export_files.ipynb                        | 4 ++--
 docs/notebooks/data_export_s3.ipynb                           | 4 ++--
 docs/notebooks/widget_brain.ipynb                             | 4 ++--
 docs/notebooks/widget_brain_with_base_dir.ipynb               | 2 +-
 docs/notebooks/widget_genomic_profiles.ipynb                  | 4 ++--
 docs/notebooks/widget_loom.ipynb                              | 4 ++--
 docs/notebooks/widget_pbmc.ipynb                              | 4 ++--
 8 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb b/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb
index 3ca1e0cf..8c68cb94 100644
--- a/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb
+++ b/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb
@@ -35,11 +35,11 @@
     "import json\n",
     "from os.path import join\n",
     "from vitessce import (\n",
-    "    convert_cell_browser_project_to_anndata,\n",
     "    AnnDataWrapper,\n",
     "    VitessceConfig,\n",
     ")\n",
-    "from vitessce.data_utils import VAR_CHUNK_SIZE"
+    "from vitessce.data_utils.anndata import VAR_CHUNK_SIZE\n",
+    "from vitessce.data_utils.ucsc_cellbrowser import convert_cell_browser_project_to_anndata"
    ]
   },
   {
diff --git a/docs/notebooks/data_export_files.ipynb b/docs/notebooks/data_export_files.ipynb
index d2b17c6e..3493a6be 100644
--- a/docs/notebooks/data_export_files.ipynb
+++ b/docs/notebooks/data_export_files.ipynb
@@ -46,7 +46,7 @@
     "    CoordinationType as ct,\n",
     "    AnnDataWrapper,\n",
     ")\n",
-    "from vitessce.data_utils import (\n",
+    "from vitessce.data_utils.anndata import (\n",
     "    optimize_adata,\n",
     "    VAR_CHUNK_SIZE,\n",
     ")"
@@ -228,7 +228,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.9.0"
   }
  },
  "nbformat": 4,
diff --git a/docs/notebooks/data_export_s3.ipynb b/docs/notebooks/data_export_s3.ipynb
index c84a0e43..48fe44d1 100644
--- a/docs/notebooks/data_export_s3.ipynb
+++ b/docs/notebooks/data_export_s3.ipynb
@@ -47,7 +47,7 @@
     "    CoordinationType as ct,\n",
     "    AnnDataWrapper,\n",
     ")\n",
-    "from vitessce.data_utils import (\n",
+    "from vitessce.data_utils.anndata import (\n",
     "    optimize_adata,\n",
     "    VAR_CHUNK_SIZE,\n",
     ")"
@@ -222,7 +222,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.9.0"
   }
  },
  "nbformat": 4,
diff --git a/docs/notebooks/widget_brain.ipynb b/docs/notebooks/widget_brain.ipynb
index 759bd928..91f989cb 100644
--- a/docs/notebooks/widget_brain.ipynb
+++ b/docs/notebooks/widget_brain.ipynb
@@ -43,7 +43,7 @@
     "    CoordinationType as ct,\n",
     "    AnnDataWrapper,\n",
     ")\n",
-    "from vitessce.data_utils import (\n",
+    "from vitessce.data_utils.anndata import (\n",
     "    optimize_adata,\n",
     "    VAR_CHUNK_SIZE,\n",
     ")"
@@ -287,7 +287,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.9.0"
   }
  },
  "nbformat": 4,
diff --git a/docs/notebooks/widget_brain_with_base_dir.ipynb b/docs/notebooks/widget_brain_with_base_dir.ipynb
index 015a3e67..7f4a0e21 100644
--- a/docs/notebooks/widget_brain_with_base_dir.ipynb
+++ b/docs/notebooks/widget_brain_with_base_dir.ipynb
@@ -43,7 +43,7 @@
     "    AnnDataWrapper,\n",
     "    BASE_URL_PLACEHOLDER,\n",
     ")\n",
-    "from vitessce.data_utils import (\n",
+    "from vitessce.data_utils.anndata import (\n",
     "    optimize_adata,\n",
     "    VAR_CHUNK_SIZE,\n",
     ")"
diff --git a/docs/notebooks/widget_genomic_profiles.ipynb b/docs/notebooks/widget_genomic_profiles.ipynb
index 1794598c..e7059308 100644
--- a/docs/notebooks/widget_genomic_profiles.ipynb
+++ b/docs/notebooks/widget_genomic_profiles.ipynb
@@ -38,7 +38,7 @@
     "    AnnDataWrapper,\n",
     "    MultivecZarrWrapper,\n",
     ")\n",
-    "from vitessce.data_utils import (\n",
+    "from vitessce.data_utils.multivec import (\n",
     "    adata_to_multivec_zarr,\n",
     ")\n",
     "from os.path import join\n",
@@ -212,7 +212,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.4"
+   "version": "3.9.0"
   }
  },
  "nbformat": 4,
diff --git a/docs/notebooks/widget_loom.ipynb b/docs/notebooks/widget_loom.ipynb
index 0c4f958b..9e1b2f6e 100644
--- a/docs/notebooks/widget_loom.ipynb
+++ b/docs/notebooks/widget_loom.ipynb
@@ -41,7 +41,7 @@
     "    CoordinationType as ct,\n",
     "    AnnDataWrapper,\n",
     ")\n",
-    "from vitessce.data_utils import (\n",
+    "from vitessce.data_utils.anndata import (\n",
     "    optimize_adata,\n",
     "    to_diamond,\n",
     "    VAR_CHUNK_SIZE,\n",
@@ -211,7 +211,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.9.0"
   }
  },
  "nbformat": 4,
diff --git a/docs/notebooks/widget_pbmc.ipynb b/docs/notebooks/widget_pbmc.ipynb
index f972949c..5b519bbf 100644
--- a/docs/notebooks/widget_pbmc.ipynb
+++ b/docs/notebooks/widget_pbmc.ipynb
@@ -43,7 +43,7 @@
     "    CoordinationType as ct,\n",
     "    AnnDataWrapper,\n",
     ")\n",
-    "from vitessce.data_utils import (\n",
+    "from vitessce.data_utils.anndata import (\n",
     "    optimize_adata,\n",
     "    VAR_CHUNK_SIZE,\n",
     ")"
@@ -201,7 +201,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.9.0"
   }
  },
  "nbformat": 4,

From 5a8953d382950b1b697f7bc7ffd13b99b5aeb335 Mon Sep 17 00:00:00 2001
From: Mark Keller <7525285+keller-mark@users.noreply.github.com>
Date: Sun, 3 Sep 2023 17:57:51 -0400
Subject: [PATCH 4/5] Coverage omit

---
 .coveragerc_omit | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.coveragerc_omit b/.coveragerc_omit
index 2a1a0f58..2e4f93e4 100644
--- a/.coveragerc_omit
+++ b/.coveragerc_omit
@@ -8,6 +8,7 @@ omit =
     vitessce/repr.py
     vitessce/data_utils/anndata/anndata.py
     vitessce/data_utils/multivec/multivec.py
+    vitessce/data_utils/multivec/entities.py
     vitessce/data_utils/ome_tiff/ome_tiff.py
     vitessce/data_utils/ome_zarr/ome_zarr.py
     vitessce/data_utils/ucsc_cellbrowser/ucsc_cellbrowser.py

From d6e5309496eb2c08714e114ccddebff5090b342a Mon Sep 17 00:00:00 2001
From: Mark Keller <7525285+keller-mark@users.noreply.github.com>
Date: Sun, 3 Sep 2023 18:00:14 -0400
Subject: [PATCH 5/5] Update imports in demos

---
 demos/codeluppi-2018/src/convert_to_cells_h5ad_zarr.py | 2 +-
 demos/codeluppi-2018/src/convert_to_ome_zarr.py        | 2 +-
 demos/combat-2022/src/convert_to_zarr.py               | 2 +-
 demos/eng-2019/src/convert_to_csv.py                   | 2 +-
 demos/habib-2017/src/convert_to_zarr.py                | 2 +-
 demos/human-lymph-node-10x-visium/src/create_zarr.py   | 6 ++++--
 demos/kuppe-2022/src/convert_to_zarr.py                | 6 ++++--
 demos/marshall-2022/src/convert_to_zarr.py             | 2 +-
 demos/meta-2022-azimuth/src/convert_to_zarr.py         | 2 +-
 demos/satija-2020/src/convert_to_zarr.py               | 2 +-
 10 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/demos/codeluppi-2018/src/convert_to_cells_h5ad_zarr.py b/demos/codeluppi-2018/src/convert_to_cells_h5ad_zarr.py
index 09bff9d5..4afeb02e 100644
--- a/demos/codeluppi-2018/src/convert_to_cells_h5ad_zarr.py
+++ b/demos/codeluppi-2018/src/convert_to_cells_h5ad_zarr.py
@@ -5,7 +5,7 @@
 import scanpy as sc
 import geopandas as gpd
 from shapely.geometry import Polygon
-from vitessce.data_utils import (
+from vitessce.data_utils.anndata import (
     to_uint8,
     sort_var_axis,
     optimize_adata,
diff --git a/demos/codeluppi-2018/src/convert_to_ome_zarr.py b/demos/codeluppi-2018/src/convert_to_ome_zarr.py
index 4ef9fa0d..a581074a 100644
--- a/demos/codeluppi-2018/src/convert_to_ome_zarr.py
+++ b/demos/codeluppi-2018/src/convert_to_ome_zarr.py
@@ -1,7 +1,7 @@
 import argparse
 import h5py
 import numpy as np
-from vitessce.data_utils import (
+from vitessce.data_utils.ome_zarr import (
     multiplex_img_to_ome_zarr,
 )
 
diff --git a/demos/combat-2022/src/convert_to_zarr.py b/demos/combat-2022/src/convert_to_zarr.py
index e90f097b..e5582788 100644
--- a/demos/combat-2022/src/convert_to_zarr.py
+++ b/demos/combat-2022/src/convert_to_zarr.py
@@ -2,7 +2,7 @@
 from anndata import read_h5ad
 import numpy as np
 from scipy import sparse
-from vitessce.data_utils import to_uint8
+from vitessce.data_utils.anndata import to_uint8
 
 
 def convert_h5ad_to_zarr(input_path, output_path):
diff --git a/demos/eng-2019/src/convert_to_csv.py b/demos/eng-2019/src/convert_to_csv.py
index e1214333..87564a73 100644
--- a/demos/eng-2019/src/convert_to_csv.py
+++ b/demos/eng-2019/src/convert_to_csv.py
@@ -1,7 +1,7 @@
 import argparse
 import json
 import pandas as pd
-from vitessce.data_utils import to_diamond
+from vitessce.data_utils.anndata import to_diamond
 
 
 def convert_to_csv(args):
diff --git a/demos/habib-2017/src/convert_to_zarr.py b/demos/habib-2017/src/convert_to_zarr.py
index e08bda31..ae1c3910 100644
--- a/demos/habib-2017/src/convert_to_zarr.py
+++ b/demos/habib-2017/src/convert_to_zarr.py
@@ -1,6 +1,6 @@
 import argparse
 from anndata import read_h5ad
-from vitessce.data_utils import (
+from vitessce.data_utils.anndata import (
     to_uint8,
     sort_var_axis,
     optimize_adata,
diff --git a/demos/human-lymph-node-10x-visium/src/create_zarr.py b/demos/human-lymph-node-10x-visium/src/create_zarr.py
index 5c7b44da..028dabf6 100644
--- a/demos/human-lymph-node-10x-visium/src/create_zarr.py
+++ b/demos/human-lymph-node-10x-visium/src/create_zarr.py
@@ -2,11 +2,13 @@
 import scanpy as sc
 import numpy as np
 import scipy.cluster
-from vitessce.data_utils import (
+from vitessce.data_utils.anndata import (
     to_diamond,
-    rgb_img_to_ome_zarr,
     optimize_adata,
 )
+from vitessce.data_utils.ome_zarr import (
+    rgb_img_to_ome_zarr,
+)
 
 
 def create_zarr(output_adata, output_img):
diff --git a/demos/kuppe-2022/src/convert_to_zarr.py b/demos/kuppe-2022/src/convert_to_zarr.py
index 21e5968c..ebbf4208 100644
--- a/demos/kuppe-2022/src/convert_to_zarr.py
+++ b/demos/kuppe-2022/src/convert_to_zarr.py
@@ -4,12 +4,14 @@
 import json
 from anndata import read_h5ad, AnnData
 import imageio.v2 as imageio
-from vitessce.data_utils import (
+from vitessce.data_utils.anndata import (
     to_diamond,
     to_uint8,
-    rgb_img_to_ome_zarr,
     optimize_adata,
 )
+from vitessce.data_utils.ome_zarr import (
+    rgb_img_to_ome_zarr,
+)
 
 
 def process_h5ad_files(args):
diff --git a/demos/marshall-2022/src/convert_to_zarr.py b/demos/marshall-2022/src/convert_to_zarr.py
index b99aa324..7729d7be 100644
--- a/demos/marshall-2022/src/convert_to_zarr.py
+++ b/demos/marshall-2022/src/convert_to_zarr.py
@@ -2,7 +2,7 @@
 from anndata import read_h5ad
 import numpy as np
 import scanpy as sc
-from vitessce.data_utils import (
+from vitessce.data_utils.anndata import (
     to_diamond,
     to_uint8,
     optimize_adata,
diff --git a/demos/meta-2022-azimuth/src/convert_to_zarr.py b/demos/meta-2022-azimuth/src/convert_to_zarr.py
index 10c82425..82b76605 100644
--- a/demos/meta-2022-azimuth/src/convert_to_zarr.py
+++ b/demos/meta-2022-azimuth/src/convert_to_zarr.py
@@ -1,7 +1,7 @@
 import argparse
 from anndata import read_h5ad
 from scipy import sparse
-from vitessce.data_utils import (
+from vitessce.data_utils.anndata import (
     to_uint8,
 )
 
diff --git a/demos/satija-2020/src/convert_to_zarr.py b/demos/satija-2020/src/convert_to_zarr.py
index cef83842..7cbe8774 100644
--- a/demos/satija-2020/src/convert_to_zarr.py
+++ b/demos/satija-2020/src/convert_to_zarr.py
@@ -2,7 +2,7 @@
 import json
 from anndata import read_h5ad
 import pandas as pd
-from vitessce.data_utils import (
+from vitessce.data_utils.anndata import (
     to_uint8,
     sort_var_axis,
     optimize_adata,