|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "metadata": { |
| 6 | + "nbsphinx": "hidden" |
| 7 | + }, |
| 8 | + "source": [ |
| 9 | + "# Vitessce Widget Tutorial" |
| 10 | + ] |
| 11 | + }, |
| 12 | + { |
| 13 | + "cell_type": "markdown", |
| 14 | + "metadata": {}, |
| 15 | + "source": [ |
| 16 | + "# Visualization of single-cell RNA seq data" |
| 17 | + ] |
| 18 | + }, |
| 19 | + { |
| 20 | + "cell_type": "markdown", |
| 21 | + "metadata": {}, |
| 22 | + "source": [ |
| 23 | + "## 1. Import dependencies\n", |
| 24 | + "\n", |
| 25 | + "We need to import the classes and functions that we will be using from the corresponding packages." |
| 26 | + ] |
| 27 | + }, |
| 28 | + { |
| 29 | + "cell_type": "code", |
| 30 | + "execution_count": null, |
| 31 | + "metadata": {}, |
| 32 | + "outputs": [], |
| 33 | + "source": [ |
| 34 | + "import os\n", |
| 35 | + "from os.path import join, isfile, isdir\n", |
| 36 | + "from urllib.request import urlretrieve\n", |
| 37 | + "from anndata import read_h5ad\n", |
| 38 | + "import scanpy as sc\n", |
| 39 | + "\n", |
| 40 | + "from vitessce import (\n", |
| 41 | + " VitessceConfig,\n", |
| 42 | + " Component as cm,\n", |
| 43 | + " CoordinationType as ct,\n", |
| 44 | + " AnnDataWrapper,\n", |
| 45 | + ")\n", |
| 46 | + "from vitessce.data_utils import (\n", |
| 47 | + " optimize_adata,\n", |
| 48 | + " VAR_CHUNK_SIZE,\n", |
| 49 | + ")" |
| 50 | + ] |
| 51 | + }, |
| 52 | + { |
| 53 | + "cell_type": "markdown", |
| 54 | + "metadata": {}, |
| 55 | + "source": [ |
| 56 | + "## 2. Download the data\n", |
| 57 | + "\n", |
| 58 | + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." |
| 59 | + ] |
| 60 | + }, |
| 61 | + { |
| 62 | + "cell_type": "code", |
| 63 | + "execution_count": null, |
| 64 | + "metadata": {}, |
| 65 | + "outputs": [], |
| 66 | + "source": [ |
| 67 | + "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", |
| 68 | + "if not isfile(adata_filepath):\n", |
| 69 | + " os.makedirs(\"data\", exist_ok=True)\n", |
| 70 | + " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" |
| 71 | + ] |
| 72 | + }, |
| 73 | + { |
| 74 | + "cell_type": "markdown", |
| 75 | + "metadata": {}, |
| 76 | + "source": [ |
| 77 | + "## 3. Load the data\n", |
| 78 | + "\n", |
| 79 | + "Note: this function may print a `FutureWarning`" |
| 80 | + ] |
| 81 | + }, |
| 82 | + { |
| 83 | + "cell_type": "code", |
| 84 | + "execution_count": null, |
| 85 | + "metadata": {}, |
| 86 | + "outputs": [], |
| 87 | + "source": [ |
| 88 | + "adata = read_h5ad(adata_filepath)" |
| 89 | + ] |
| 90 | + }, |
| 91 | + { |
| 92 | + "cell_type": "markdown", |
| 93 | + "metadata": { |
| 94 | + "tags": [] |
| 95 | + }, |
| 96 | + "source": [ |
| 97 | + "## 3.1. Preprocess the Data For Visualization\n", |
| 98 | + "\n", |
| 99 | + "This dataset contains 25,587 genes. We prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." |
| 100 | + ] |
| 101 | + }, |
| 102 | + { |
| 103 | + "cell_type": "code", |
| 104 | + "execution_count": null, |
| 105 | + "metadata": {}, |
| 106 | + "outputs": [], |
| 107 | + "source": [ |
| 108 | + "top_dispersion = adata.var[\"dispersions_norm\"][\n", |
| 109 | + " sorted(\n", |
| 110 | + " range(len(adata.var[\"dispersions_norm\"])),\n", |
| 111 | + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", |
| 112 | + " )[-51:][0]\n", |
| 113 | + "]\n", |
| 114 | + "adata.var[\"top_highly_variable\"] = (\n", |
| 115 | + " adata.var[\"dispersions_norm\"] > top_dispersion\n", |
| 116 | + ")" |
| 117 | + ] |
| 118 | + }, |
| 119 | + { |
| 120 | + "cell_type": "markdown", |
| 121 | + "metadata": {}, |
| 122 | + "source": [ |
| 123 | + "## 3.2 Save the Data to Zarr store\n", |
| 124 | + "\n", |
| 125 | + "We want to convert the original `h5ad` file to a [Zarr](https://zarr.readthedocs.io/en/stable/) store, which Vitessce is able to load. We can use the `optimize_adata` function to ensure that all arrays and dataframe columns that we intend to use in our visualization are in the optimal format to be loaded by Vitessce. This function will cast arrays to numerical data types that take up less space (as long as the values allow). Note: unused arrays and columns (i.e., not specified in any of the parameters to `optimize_adata`) will not be copied into the new AnnData object." |
| 126 | + ] |
| 127 | + }, |
| 128 | + { |
| 129 | + "cell_type": "code", |
| 130 | + "execution_count": null, |
| 131 | + "metadata": {}, |
| 132 | + "outputs": [], |
| 133 | + "source": [ |
| 134 | + "zarr_filepath = join(\"data\", \"habib17.h5ad.zarr\")\n", |
| 135 | + "if not isdir(zarr_filepath):\n", |
| 136 | + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" |
| 137 | + ] |
| 138 | + }, |
| 139 | + { |
| 140 | + "cell_type": "code", |
| 141 | + "execution_count": null, |
| 142 | + "metadata": {}, |
| 143 | + "outputs": [], |
| 144 | + "source": [ |
| 145 | + "vc = VitessceConfig(\n", |
| 146 | + " schema_version=\"1.0.17\",\n", |
| 147 | + " name='Habib et al',\n", |
| 148 | + " description='COVID-19 Healthy Donor Brain'\n", |
| 149 | + ")\n", |
| 150 | + "\n", |
| 151 | + "# Add data.\n", |
| 152 | + "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", |
| 153 | + " adata_path=zarr_filepath,\n", |
| 154 | + " obs_embedding_paths=[\"obsm/X_umap\"],\n", |
| 155 | + " obs_embedding_names=[\"UMAP\"],\n", |
| 156 | + " obs_set_paths=[\"obs/CellType\"],\n", |
| 157 | + " obs_set_names=[\"Cell Type\"],\n", |
| 158 | + " obs_feature_matrix_path=\"X\",\n", |
| 159 | + " initial_feature_filter_path=\"var/top_highly_variable\",\n", |
| 160 | + " coordination_values={\n", |
| 161 | + " \"obsType\": 'cell',\n", |
| 162 | + " \"featureType\": 'gene',\n", |
| 163 | + " \"featureValueType\": 'expression',\n", |
| 164 | + " },\n", |
| 165 | + ")).add_object(AnnDataWrapper(\n", |
| 166 | + " adata_path=zarr_filepath,\n", |
| 167 | + " obs_feature_column_paths=[\"obs/percent_mito\"],\n", |
| 168 | + " coordination_values={\n", |
| 169 | + " \"obsType\": 'cell',\n", |
| 170 | + " \"featureType\": 'qualityMetric',\n", |
| 171 | + " \"featureValueType\": 'value',\n", |
| 172 | + " }\n", |
| 173 | + "))\n", |
| 174 | + "\n", |
| 175 | + "# Add views.\n", |
| 176 | + "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", |
| 177 | + "scatterplot_2 = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", |
| 178 | + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", |
| 179 | + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", |
| 180 | + "histogram = vc.add_view(cm.FEATURE_VALUE_HISTOGRAM, dataset=dataset)\n", |
| 181 | + "\n", |
| 182 | + "# Link views.\n", |
| 183 | + "\n", |
| 184 | + "# Color one of the two scatterplots by the percent_mito quality metric.\n", |
| 185 | + "# Also use this quality metric for the histogram values.\n", |
| 186 | + "vc.link_views_by_dict([histogram, scatterplot_2], {\n", |
| 187 | + " \"obsType\": 'cell',\n", |
| 188 | + " \"featureType\": 'qualityMetric',\n", |
| 189 | + " \"featureValueType\": 'value',\n", |
| 190 | + " \"featureSelection\": [\"percent_mito\"],\n", |
| 191 | + " \"obsColorEncoding\": \"geneSelection\",\n", |
| 192 | + "}, meta=False)\n", |
| 193 | + "\n", |
| 194 | + "# Synchronize the zooming and panning of the two scatterplots\n", |
| 195 | + "vc.link_views_by_dict([scatterplot, scatterplot_2], {\n", |
| 196 | + " \"embeddingZoom\": None,\n", |
| 197 | + " \"embeddingTargetX\": None,\n", |
| 198 | + " \"embeddingTargetY\": None,\n", |
| 199 | + "}, meta=False)\n", |
| 200 | + "\n", |
| 201 | + "# Define the layout.\n", |
| 202 | + "vc.layout((scatterplot | (cell_sets / genes)) / (scatterplot_2 | histogram));" |
| 203 | + ] |
| 204 | + }, |
| 205 | + { |
| 206 | + "cell_type": "markdown", |
| 207 | + "metadata": {}, |
| 208 | + "source": [ |
| 209 | + "## 5. Create the widget\n" |
| 210 | + ] |
| 211 | + }, |
| 212 | + { |
| 213 | + "cell_type": "code", |
| 214 | + "execution_count": null, |
| 215 | + "metadata": {}, |
| 216 | + "outputs": [], |
| 217 | + "source": [ |
| 218 | + "vw = vc.widget()\n", |
| 219 | + "vw" |
| 220 | + ] |
| 221 | + }, |
| 222 | + { |
| 223 | + "cell_type": "code", |
| 224 | + "execution_count": null, |
| 225 | + "metadata": {}, |
| 226 | + "outputs": [], |
| 227 | + "source": [] |
| 228 | + } |
| 229 | + ], |
| 230 | + "metadata": { |
| 231 | + "kernelspec": { |
| 232 | + "display_name": "Python 3 (ipykernel)", |
| 233 | + "language": "python", |
| 234 | + "name": "python3" |
| 235 | + }, |
| 236 | + "language_info": { |
| 237 | + "codemirror_mode": { |
| 238 | + "name": "ipython", |
| 239 | + "version": 3 |
| 240 | + }, |
| 241 | + "file_extension": ".py", |
| 242 | + "mimetype": "text/x-python", |
| 243 | + "name": "python", |
| 244 | + "nbconvert_exporter": "python", |
| 245 | + "pygments_lexer": "ipython3", |
| 246 | + "version": "3.10.14" |
| 247 | + } |
| 248 | + }, |
| 249 | + "nbformat": 4, |
| 250 | + "nbformat_minor": 4 |
| 251 | +} |
0 commit comments