vitessce · keller-mark · Nov 20, 2023
diff --git a/docs/notebooks/widget_spaceranger.ipynb b/docs/notebooks/widget_spaceranger.ipynb
@@ -0,0 +1,334 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "nbsphinx": "hidden"
+   },
+   "source": [
+    "# Vitessce Widget Tutorial"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Visualization of Space Ranger outputs for 10x Visium\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from vitessce import (\n",
+    "    VitessceConfig,\n",
+    "    Component as cm,\n",
+    "    CoordinationType as ct,\n",
+    "    OmeTiffWrapper,\n",
+    "    MultiImageWrapper,\n",
+    "    AnnDataWrapper,\n",
+    "    CoordinationLevel as CL,\n",
+    ")\n",
+    "from vitessce.data_utils import (\n",
+    "   rgb_img_to_ome_tiff,\n",
+    "    VAR_CHUNK_SIZE\n",
+    ")\n",
+    "import os\n",
+    "from os.path import join\n",
+    "import requests\n",
+    "from anndata import AnnData, read_hdf\n",
+    "import tifffile\n",
+    "import tarfile\n",
+    "import scanpy as sc\n",
+    "import pandas as pd\n",
+    "import imageio.v3 as iio\n",
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 0. Download data\n",
+    "\n",
+    "https://www.10xgenomics.com/resources/datasets/gene-protein-expression-library-of-human-tonsil-cytassist-ffpe-2-standard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DATA_DIR = join(\"data\", \"human_tonsil\")\n",
+    "PROCESSED_DIR = join(DATA_DIR, \"processed\")\n",
+    "os.makedirs(PROCESSED_DIR, exist_ok=True)\n",
+    "spatial_filepath = join(DATA_DIR, \"spatial.tar.gz\")\n",
+    "matrix_filepath = join(DATA_DIR, \"matrix.h5\")\n",
+    "analysis_filepath = join(DATA_DIR, \"analysis.tar.gz\")\n",
+    "\n",
+    "base_url = 'https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_FFPE_Protein_Expression_Human_Tonsil/CytAssist_FFPE_Protein_Expression_Human_Tonsil'\n",
+    "\n",
+    "# Download\n",
+    "r_spatial = requests.get(f'{base_url}_spatial.tar.gz')\n",
+    "with open(spatial_filepath, 'wb') as f:\n",
+    "    f.write(r_spatial.content)\n",
+    "\n",
+    "r_matrix = requests.get(f'{base_url}_filtered_feature_bc_matrix.h5')\n",
+    "with open(matrix_filepath, 'wb') as f:\n",
+    "    f.write(r_matrix.content)\n",
+    "\n",
+    "r_analysis = requests.get(f'{base_url}_analysis.tar.gz')\n",
+    "with open(analysis_filepath, 'wb') as f:\n",
+    "    f.write(r_analysis.content)\n",
+    "    \n",
+    "# Un-tar\n",
+    "with tarfile.open(spatial_filepath, \"r:gz\") as tar:\n",
+    "    tar.extractall(path = DATA_DIR)\n",
+    "with tarfile.open(analysis_filepath, \"r:gz\") as tar:\n",
+    "    tar.extractall(path = DATA_DIR)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Convert data to Vitessce-compatible formats\n",
+    "\n",
+    "We need to convert the proprietary 10x Genomics formats to open formats that are compatible with Vitessce such as OME-TIFF and AnnData"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/mkeller/software/miniconda3/envs/vitessce-python-notebooks/lib/python3.9/site-packages/anndata/_core/anndata.py:1900: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n",
+      "  utils.warn_names_duplicates(\"var\")\n",
+      "/Users/mkeller/software/miniconda3/envs/vitessce-python-notebooks/lib/python3.9/site-packages/anndata/_core/anndata.py:1900: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n",
+      "  utils.warn_names_duplicates(\"var\")\n"
+     ]
+    }
+   ],
+   "source": [
+    "adata = sc.read_10x_h5(matrix_filepath)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tsne_df = pd.read_csv(join(DATA_DIR, \"analysis\", \"tsne\", \"gene_expression_2_components\", \"projection.csv\"), index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata.obsm['X_tsne'] = tsne_df.loc[adata.obs.index.tolist()].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(join(DATA_DIR, \"spatial\", \"scalefactors_json.json\"), \"r\") as f:\n",
+    "    scale_factors = json.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'regist_target_img_scalef': 0.1533272,\n",
+       " 'tissue_hires_scalef': 0.051109068,\n",
+       " 'tissue_lowres_scalef': 0.01533272,\n",
+       " 'fiducial_diameter_fullres': 383.3196076845379,\n",
+       " 'spot_diameter_fullres': 255.54640512302527}"
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "scale_factors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pos_df = pd.read_csv(join(DATA_DIR, \"spatial\", \"tissue_positions.csv\"), index_col=0)[['pxl_row_in_fullres', 'pxl_col_in_fullres']].loc[adata.obs.index.tolist()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata.obsm['X_spatial'] = pos_df.values * scale_factors['tissue_hires_scalef']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata.write_zarr(join(PROCESSED_DIR, \"human_tonsil.h5ad.zarr\"), chunks=(adata.shape[0], VAR_CHUNK_SIZE))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(3, 1634, 2000)"
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "img_arr = iio.imread(join(DATA_DIR, \"spatial\", \"tissue_hires_image.png\"))\n",
+    "img_arr = img_arr.transpose((2, 0, 1))\n",
+    "img_arr.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rgb_img_to_ome_tiff(img_arr, join(PROCESSED_DIR, \"human_tonsil.ome.tiff\"), img_name=\"Image\", axes=\"CYX\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## 1. Configure Vitessce\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vc = VitessceConfig(schema_version=\"1.0.16\", name='Tonsil')\n",
+    "dataset = vc.add_dataset(name='Space Ranger outputs').add_object(AnnDataWrapper(\n",
+    "    adata_path=join(PROCESSED_DIR, \"human_tonsil.h5ad.zarr\"),\n",
+    "    obs_feature_matrix_path=\"X\",\n",
+    "    obs_embedding_paths=[\"obsm/X_tsne\"],\n",
+    "    obs_embedding_names=[\"t-SNE\"],\n",
+    "    obs_locations_path=\"obsm/X_spatial\",\n",
+    "    coordination_values={\n",
+    "        \"obsType\": \"spot\"\n",
+    "    }\n",
+    ")).add_object(OmeTiffWrapper(\n",
+    "    img_path=join(PROCESSED_DIR, \"human_tonsil.ome.tiff\")\n",
+    "))\n",
+    "\n",
+    "spatial = vc.add_view(\"spatial\", dataset=dataset)\n",
+    "lc = vc.add_view(\"layerController\", dataset=dataset)\n",
+    "heatmap = vc.add_view(\"heatmap\", dataset=dataset)\n",
+    "scatterplot = vc.add_view(\"scatterplot\", dataset=dataset, mapping=\"t-SNE\")\n",
+    "\n",
+    "vc.link_views([spatial, lc, heatmap, scatterplot], [\"obsType\"], [\"spot\"])\n",
+    "\n",
+    "vc.layout((spatial | lc) / (heatmap | scatterplot));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Create the Vitessce widget"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4b44074642ab42fa96867c6787c71173",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VitessceWidget(config={'version': '1.0.16', 'name': 'Tonsil', 'description': '', 'datasets': [{'uid': 'A', 'na…"
+      ]
+     },
+     "execution_count": 93,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "vw = vc.widget()\n",
+    "vw"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}