Skip to content

Commit 324a5b0

Browse files
authored
Utilities for sorting spatialdata points along z-order curve (#476)
* Add zorder functions to data_utils * Add comments, updated notebook * Comments * Comments * WIP: unit tests * Update z-order query tests * Use fixtures * Update tests * WIP: update nb * Add helper functions * Update nb * Lint * Use small dataset for tests * Lint * Add spatialdata dep for tests * Update * Update * Update * Update
1 parent 569117a commit 324a5b0

File tree

21 files changed

+1310
-0
lines changed

21 files changed

+1310
-0
lines changed

.coveragerc_omit

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ omit =
1212
src/vitessce/data_utils/ome.py
1313
src/vitessce/data_utils/entities.py
1414
src/vitessce/data_utils/multivec.py
15+
src/vitessce/data_utils/spatialdata_points_zorder.py
1516
src/vitessce/widget_plugins/demo_plugin.py
1617
src/vitessce/widget_plugins/spatial_query.py
Lines changed: 387 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,387 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {
6+
"nbsphinx": "hidden"
7+
},
8+
"source": [
9+
"# Vitessce Widget Tutorial"
10+
]
11+
},
12+
{
13+
"cell_type": "markdown",
14+
"metadata": {},
15+
"source": [
16+
"# Visualization of a SpatialData object"
17+
]
18+
},
19+
{
20+
"cell_type": "markdown",
21+
"metadata": {},
22+
"source": [
23+
"## Import dependencies\n"
24+
]
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": null,
29+
"metadata": {},
30+
"outputs": [],
31+
"source": [
32+
"import os\n",
33+
"from os.path import join, isfile, isdir\n",
34+
"from urllib.request import urlretrieve\n",
35+
"import zipfile\n",
36+
"import shutil\n",
37+
"\n",
38+
"from vitessce import (\n",
39+
" VitessceConfig,\n",
40+
" ViewType as vt,\n",
41+
" CoordinationType as ct,\n",
42+
" CoordinationLevel as CL,\n",
43+
" SpatialDataWrapper,\n",
44+
" get_initial_coordination_scope_prefix\n",
45+
")\n",
46+
"\n",
47+
"from vitessce.data_utils import (\n",
48+
" sdata_morton_sort_points,\n",
49+
" sdata_points_process_columns,\n",
50+
" sdata_points_write_bounding_box_attrs,\n",
51+
" sdata_points_modify_row_group_size,\n",
52+
" sdata_morton_query_rect,\n",
53+
")"
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"execution_count": null,
59+
"metadata": {},
60+
"outputs": [],
61+
"source": [
62+
"from spatialdata import read_zarr"
63+
]
64+
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": null,
68+
"metadata": {},
69+
"outputs": [],
70+
"source": [
71+
"data_dir = \"data\"\n",
72+
"zip_filepath = join(data_dir, \"xenium_rep1_io.spatialdata.zarr.zip\")\n",
73+
"spatialdata_filepath = join(data_dir, \"xenium_rep1_io.spatialdata.zarr\")"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": null,
79+
"metadata": {},
80+
"outputs": [],
81+
"source": [
82+
"if not isdir(spatialdata_filepath):\n",
83+
" if not isfile(zip_filepath):\n",
84+
" os.makedirs(data_dir, exist_ok=True)\n",
85+
" urlretrieve('https://s3.embl.de/spatialdata/spatialdata-sandbox/xenium_rep1_io.zip', zip_filepath)\n",
86+
" with zipfile.ZipFile(zip_filepath,\"r\") as zip_ref:\n",
87+
" zip_ref.extractall(data_dir)\n",
88+
" os.rename(join(data_dir, \"data.zarr\"), spatialdata_filepath)\n",
89+
" \n",
90+
" # This Xenium dataset has an AnnData \"raw\" element.\n",
91+
" # Reference: https://github.com/giovp/spatialdata-sandbox/issues/55\n",
92+
" raw_dir = join(spatialdata_filepath, \"tables\", \"table\", \"raw\")\n",
93+
" if isdir(raw_dir):\n",
94+
" shutil.rmtree(raw_dir)"
95+
]
96+
},
97+
{
98+
"cell_type": "code",
99+
"execution_count": null,
100+
"metadata": {},
101+
"outputs": [],
102+
"source": [
103+
"sdata = read_zarr(spatialdata_filepath)\n",
104+
"sdata"
105+
]
106+
},
107+
{
108+
"cell_type": "code",
109+
"execution_count": null,
110+
"metadata": {},
111+
"outputs": [],
112+
"source": [
113+
"sdata[\"transcripts\"].shape[0].compute()"
114+
]
115+
},
116+
{
117+
"cell_type": "code",
118+
"execution_count": null,
119+
"metadata": {},
120+
"outputs": [],
121+
"source": [
122+
"sdata.tables[\"table\"].X = sdata.tables[\"table\"].X.toarray()\n",
123+
"sdata.tables[\"dense_table\"] = sdata.tables[\"table\"]\n",
124+
"sdata.write_element(\"dense_table\")"
125+
]
126+
},
127+
{
128+
"cell_type": "code",
129+
"execution_count": null,
130+
"metadata": {},
131+
"outputs": [],
132+
"source": [
133+
"# TODO: store the two separate images as a single image with two channels.\n",
134+
"# Similar to https://github.com/EricMoerthVis/tissue-map-tools/pull/12"
135+
]
136+
},
137+
{
138+
"cell_type": "code",
139+
"execution_count": null,
140+
"metadata": {},
141+
"outputs": [],
142+
"source": [
143+
"sdata.tables['table'].obs"
144+
]
145+
},
146+
{
147+
"cell_type": "code",
148+
"execution_count": null,
149+
"metadata": {},
150+
"outputs": [],
151+
"source": [
152+
"sdata"
153+
]
154+
},
155+
{
156+
"cell_type": "code",
157+
"execution_count": null,
158+
"metadata": {},
159+
"outputs": [],
160+
"source": [
161+
"sdata.points['transcripts'].head()"
162+
]
163+
},
164+
{
165+
"cell_type": "markdown",
166+
"metadata": {},
167+
"source": [
168+
"## Sorting Points and creating a new Points element in the SpatialData object"
169+
]
170+
},
171+
{
172+
"cell_type": "markdown",
173+
"metadata": {},
174+
"source": [
175+
"### Step 1. Sort rows with `sdata_morton_sort_points`"
176+
]
177+
},
178+
{
179+
"cell_type": "code",
180+
"execution_count": null,
181+
"metadata": {},
182+
"outputs": [],
183+
"source": [
184+
"sdata = sdata_morton_sort_points(sdata, \"transcripts\")"
185+
]
186+
},
187+
{
188+
"cell_type": "markdown",
189+
"metadata": {},
190+
"source": [
191+
"### Step 2. Clean up columns with `sdata_points_process_columns`"
192+
]
193+
},
194+
{
195+
"cell_type": "code",
196+
"execution_count": null,
197+
"metadata": {},
198+
"outputs": [],
199+
"source": [
200+
"# Add feature_index column to dataframe, and reorder columns so that feature_name (dict column) is the rightmost column.\n",
201+
"ddf = sdata_points_process_columns(sdata, \"transcripts\", var_name_col=\"feature_name\", table_name=\"table\")"
202+
]
203+
},
204+
{
205+
"cell_type": "code",
206+
"execution_count": null,
207+
"metadata": {},
208+
"outputs": [],
209+
"source": [
210+
"ddf.head()"
211+
]
212+
},
213+
{
214+
"cell_type": "markdown",
215+
"metadata": {},
216+
"source": [
217+
"### Step 3. Save sorted dataframe to new Points element"
218+
]
219+
},
220+
{
221+
"cell_type": "code",
222+
"execution_count": null,
223+
"metadata": {},
224+
"outputs": [],
225+
"source": [
226+
"sdata[\"transcripts_with_morton_codes\"] = ddf\n",
227+
"sdata.write_element(\"transcripts_with_morton_codes\")"
228+
]
229+
},
230+
{
231+
"cell_type": "markdown",
232+
"metadata": {},
233+
"source": [
234+
"### Step 4. Write bounding box metadata with `sdata_points_write_bounding_box_attrs`"
235+
]
236+
},
237+
{
238+
"cell_type": "code",
239+
"execution_count": null,
240+
"metadata": {},
241+
"outputs": [],
242+
"source": [
243+
"sdata_points_write_bounding_box_attrs(sdata, \"transcripts_with_morton_codes\")"
244+
]
245+
},
246+
{
247+
"cell_type": "markdown",
248+
"metadata": {},
249+
"source": [
250+
"### Step 5. Modify the row group sizes of the Parquet files with `sdata_points_modify_row_group_size`"
251+
]
252+
},
253+
{
254+
"cell_type": "code",
255+
"execution_count": null,
256+
"metadata": {},
257+
"outputs": [],
258+
"source": [
259+
"sdata_points_modify_row_group_size(sdata, \"transcripts_with_morton_codes\", row_group_size=25_000)"
260+
]
261+
},
262+
{
263+
"cell_type": "code",
264+
"execution_count": null,
265+
"metadata": {},
266+
"outputs": [],
267+
"source": [
268+
"# Done"
269+
]
270+
},
271+
{
272+
"cell_type": "code",
273+
"execution_count": null,
274+
"metadata": {},
275+
"outputs": [],
276+
"source": [
277+
"# Optionally, check the number of row groups in one of the parquet file parts.\n",
278+
"import pyarrow.parquet as pq\n",
279+
"from os.path import join\n",
280+
"\n",
281+
"parquet_file = pq.ParquetFile(join(sdata.path, \"points\", \"transcripts_with_morton_codes\", \"points.parquet\", \"part.0.parquet\"))\n",
282+
"\n",
283+
"# Get the number of row groups in this part-0 file.\n",
284+
"num_groups = parquet_file.num_row_groups\n",
285+
"num_groups"
286+
]
287+
},
288+
{
289+
"cell_type": "markdown",
290+
"metadata": {},
291+
"source": [
292+
"## Configure Vitessce\n",
293+
"\n",
294+
"Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views."
295+
]
296+
},
297+
{
298+
"cell_type": "code",
299+
"execution_count": null,
300+
"metadata": {},
301+
"outputs": [],
302+
"source": [
303+
"vc = VitessceConfig(\n",
304+
" schema_version=\"1.0.18\",\n",
305+
" name='MERFISH SpatialData Demo',\n",
306+
")\n",
307+
"# Add data to the configuration:\n",
308+
"wrapper = SpatialDataWrapper(\n",
309+
" sdata_path=spatialdata_filepath,\n",
310+
" # The following paths are relative to the root of the SpatialData zarr store on-disk.\n",
311+
" image_path=\"images/rasterized\",\n",
312+
" table_path=\"tables/table\",\n",
313+
" obs_feature_matrix_path=\"tables/table/X\",\n",
314+
" obs_spots_path=\"shapes/cells\",\n",
315+
" coordinate_system=\"global\",\n",
316+
" coordination_values={\n",
317+
" # The following tells Vitessce to consider each observation as a \"spot\"\n",
318+
" \"obsType\": \"cell\",\n",
319+
" }\n",
320+
")\n",
321+
"dataset = vc.add_dataset(name='MERFISH').add_object(wrapper)\n",
322+
"\n",
323+
"# Add views (visualizations) to the configuration:\n",
324+
"spatial = vc.add_view(\"spatialBeta\", dataset=dataset)\n",
325+
"feature_list = vc.add_view(\"featureList\", dataset=dataset)\n",
326+
"layer_controller = vc.add_view(\"layerControllerBeta\", dataset=dataset)\n",
327+
"obs_sets = vc.add_view(\"obsSets\", dataset=dataset)\n",
328+
"\n",
329+
"vc.link_views_by_dict([spatial, layer_controller], {\n",
330+
" 'spotLayer': CL([{\n",
331+
" 'obsType': 'cell',\n",
332+
" }]),\n",
333+
"}, scope_prefix=get_initial_coordination_scope_prefix(\"A\", \"obsSpots\"))\n",
334+
"\n",
335+
"vc.link_views([spatial, layer_controller, feature_list, obs_sets], ['obsType'], [wrapper.obs_type_label])\n",
336+
"\n",
337+
"# Layout the views\n",
338+
"vc.layout(spatial | (feature_list / layer_controller / obs_sets));"
339+
]
340+
},
341+
{
342+
"cell_type": "markdown",
343+
"metadata": {},
344+
"source": [
345+
"### Render the widget"
346+
]
347+
},
348+
{
349+
"cell_type": "code",
350+
"execution_count": null,
351+
"metadata": {},
352+
"outputs": [],
353+
"source": [
354+
"vw = vc.widget()\n",
355+
"vw"
356+
]
357+
},
358+
{
359+
"cell_type": "code",
360+
"execution_count": null,
361+
"metadata": {},
362+
"outputs": [],
363+
"source": []
364+
}
365+
],
366+
"metadata": {
367+
"kernelspec": {
368+
"display_name": "Python 3 (ipykernel)",
369+
"language": "python",
370+
"name": "python3"
371+
},
372+
"language_info": {
373+
"codemirror_mode": {
374+
"name": "ipython",
375+
"version": 3
376+
},
377+
"file_extension": ".py",
378+
"mimetype": "text/x-python",
379+
"name": "python",
380+
"nbconvert_exporter": "python",
381+
"pygments_lexer": "ipython3",
382+
"version": "3.10.14"
383+
}
384+
},
385+
"nbformat": 4,
386+
"nbformat_minor": 4
387+
}

0 commit comments

Comments
 (0)