Skip to content

Commit 9f6ede9

Browse files
authored
Fix data subsetting (#104)
* Add rechunking after cropping data * Add comment
1 parent b0f6ea7 commit 9f6ede9

File tree

1 file changed

+29
-0
lines changed

1 file changed

+29
-0
lines changed

src/data_processors/process_dataset/script.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,34 @@ def get_crop_coords(sdata, max_n_pixels=50000*50000):
5656

5757
return crop
5858

59+
def rechunk_sdata(sdata, CHUNK_SIZE=1024):
60+
"""Rechunk the sdata to the given chunk size
61+
62+
Arguments
63+
---------
64+
sdata: spatialdata.SpatialData
65+
The spatial data to rechunk
66+
CHUNK_SIZE: int
67+
The chunk size to rechunk to
68+
69+
"""
70+
71+
for key in list(sdata.images.keys()):
72+
image = sdata.images[key]
73+
coords = list(image["scale0"].coords.keys())
74+
rechunk_strategy = {c: CHUNK_SIZE for c in coords}
75+
if "c" in coords:
76+
rechunk_strategy["c"] = image["scale0"]["image"].chunks[0][0]
77+
image = image.chunk(rechunk_strategy)
78+
sdata.images[key] = image
79+
80+
for key in list(sdata.labels.keys()):
81+
label_image = sdata.labels[key]
82+
coords = list(label_image.coords.keys())
83+
rechunk_strategy = {c: CHUNK_SIZE for c in coords}
84+
label_image = label_image.chunk(rechunk_strategy)
85+
sdata.labels[key] = label_image
86+
5987

6088
# Load the single-cell data
6189
adata = ad.read_h5ad(par["input_sc"])
@@ -102,6 +130,7 @@ def get_crop_coords(sdata, max_n_pixels=50000*50000):
102130
target_coordinate_system="global",
103131
filter_table=True,
104132
)
133+
rechunk_sdata(sdata_output) #NOTE: rechunking currently needed (https://github.com/scverse/spatialdata/issues/929)
105134
else:
106135
sdata_output = sdata
107136

0 commit comments

Comments
 (0)