Skip to content

Commit 351f1dc

Browse files
committed
Use small dataset for tests
1 parent f35998a commit 351f1dc

File tree

16 files changed

+225
-18
lines changed

16 files changed

+225
-18
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import os
2+
from os.path import join, isfile, isdir
3+
from urllib.request import urlretrieve
4+
import zipfile
5+
import shutil
6+
7+
# Used spatialdata==0.4.0 on October 30, 2025
8+
from spatialdata import read_zarr, SpatialData
9+
10+
11+
data_dir = "data"
12+
zip_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr.zip")
13+
spatialdata_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr")
14+
15+
16+
if not isdir(spatialdata_filepath):
17+
if not isfile(zip_filepath):
18+
os.makedirs(data_dir, exist_ok=True)
19+
urlretrieve('https://s3.embl.de/spatialdata/spatialdata-sandbox/xenium_rep1_io.zip', zip_filepath)
20+
with zipfile.ZipFile(zip_filepath,"r") as zip_ref:
21+
zip_ref.extractall(data_dir)
22+
os.rename(join(data_dir, "data.zarr"), spatialdata_filepath)
23+
24+
# This Xenium dataset has an AnnData "raw" element.
25+
# Reference: https://github.com/giovp/spatialdata-sandbox/issues/55
26+
raw_dir = join(spatialdata_filepath, "tables", "table", "raw")
27+
if isdir(raw_dir):
28+
shutil.rmtree(raw_dir)
29+
30+
sdata = read_zarr(spatialdata_filepath)
31+
32+
ddf = sdata.points["transcripts"]
33+
34+
# 2. Define a function to take every 100th row from a partition
35+
def select_every_200th(partition):
36+
# Each 'partition' is a Pandas DataFrame
37+
# .iloc[::100] is the efficient pandas way to get every 100th row
38+
return partition.iloc[::200]
39+
40+
# 3. Apply this function to every partition in the Dask DataFrame
41+
result = ddf.map_partitions(select_every_200th)
42+
43+
# 4. Compute the result to see it
44+
filtered_ddf = result[["x", "y", "z", "feature_name", "cell_id"]]
45+
46+
small_sdata = SpatialData(points={ "transcripts": filtered_ddf })
47+
48+
small_sdata.write("xenium_rep1_io.points_only.spatialdata.zarr", overwrite=True)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"spatialdata_attrs": {
3+
"spatialdata_software_version": "0.4.0",
4+
"version": "0.1"
5+
}
6+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"zarr_format": 2
3+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"zarr_format": 2
3+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
{
2+
"axes": [
3+
"x",
4+
"y",
5+
"z"
6+
],
7+
"coordinateTransformations": [
8+
{
9+
"input": {
10+
"axes": [
11+
{
12+
"name": "x",
13+
"type": "space",
14+
"unit": "unit"
15+
},
16+
{
17+
"name": "y",
18+
"type": "space",
19+
"unit": "unit"
20+
},
21+
{
22+
"name": "z",
23+
"type": "space",
24+
"unit": "unit"
25+
}
26+
],
27+
"name": "xyz"
28+
},
29+
"output": {
30+
"axes": [
31+
{
32+
"name": "x",
33+
"type": "space",
34+
"unit": "unit"
35+
},
36+
{
37+
"name": "y",
38+
"type": "space",
39+
"unit": "unit"
40+
},
41+
{
42+
"name": "z",
43+
"type": "space",
44+
"unit": "unit"
45+
}
46+
],
47+
"name": "global"
48+
},
49+
"scale": [
50+
4.705882352941177,
51+
4.705882352941177,
52+
1.0
53+
],
54+
"type": "scale"
55+
}
56+
],
57+
"encoding-type": "ngff:points",
58+
"spatialdata_attrs": {
59+
"feature_key": "feature_name",
60+
"instance_key": "cell_id",
61+
"version": "0.1"
62+
}
63+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"zarr_format": 2
3+
}

0 commit comments

Comments
 (0)