Skip to content

Commit b0f6ea7

Browse files
authored
Feature key renaming and set a maximum size for datasets (#103)
* Add feature key renaming * Set a maximum size for datasets
1 parent 66813a5 commit b0f6ea7

File tree

1 file changed

+65
-1
lines changed

1 file changed

+65
-1
lines changed

src/data_processors/process_dataset/script.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,50 @@
1313
}
1414
### VIASH END
1515

16+
17+
def get_crop_coords(sdata, max_n_pixels=50000*50000):
18+
"""Get the crop coordinates to subset the sdata to max_n_pixels
19+
20+
Arguments
21+
---------
22+
sdata: spatialdata.SpatialData
23+
The spatial data to crop
24+
max_n_pixels: int
25+
The maximum number of pixels to keep
26+
27+
Returns
28+
-------
29+
crop_coords: dict
30+
The crop coordinates
31+
"""
32+
33+
_, h, w = sdata['morphology_mip']["scale0"].image.shape
34+
#h, w = sdata
35+
36+
# Check if the image is already below the maximum number of pixels
37+
if h * w <= max_n_pixels:
38+
return None
39+
40+
# Initialize with square crop
41+
h_crop = w_crop = int(np.sqrt(max_n_pixels))
42+
43+
# Adjust crop if necessary to fit the image
44+
if h_crop > h:
45+
h_crop = h
46+
w_crop = int(max_n_pixels / h_crop)
47+
elif w_crop > w:
48+
w_crop = w
49+
h_crop = int(max_n_pixels / w_crop)
50+
51+
# Center the crop
52+
h_offset = (h - h_crop) // 2
53+
w_offset = (w - w_crop) // 2
54+
55+
crop = [[h_offset, h_offset + h_crop], [w_offset, w_offset + w_crop]]
56+
57+
return crop
58+
59+
1660
# Load the single-cell data
1761
adata = ad.read_h5ad(par["input_sc"])
1862

@@ -41,6 +85,26 @@
4185
sdata.table.uns[orig_col] = sdata.table.uns[col]
4286
sdata.table.uns[col] = par[col]
4387

88+
# Correct the feature_key attribute in sdata if needed
89+
# NOTE: it would have been better to do this in the loader scripts, but this way the datasets don't need to be re-downloaded
90+
if "feature_key" in sdata['transcripts'].attrs["spatialdata_attrs"]:
91+
feature_key = sdata['transcripts'].attrs["spatialdata_attrs"]["feature_key"]
92+
if feature_key != "feature_name":
93+
sdata['transcripts'].attrs["spatialdata_attrs"]["feature_key"] = "feature_name"
94+
95+
# Crop datasets that are too large
96+
crop_coords = get_crop_coords(sdata)
97+
if crop_coords is not None:
98+
sdata_output = sdata.query.bounding_box(
99+
axes=["y", "x"],
100+
min_coordinate=[crop_coords[0][0], crop_coords[1][0]],
101+
max_coordinate=[crop_coords[0][1], crop_coords[1][1]],
102+
target_coordinate_system="global",
103+
filter_table=True,
104+
)
105+
else:
106+
sdata_output = sdata
107+
44108
# Save the single-cell data
45109
adata.write_h5ad(par["output_sc"], compression="gzip")
46110

@@ -49,4 +113,4 @@
49113
shutil.rmtree(par["output_sp"])
50114

51115
# Save the spatial data
52-
sdata.write(par["output_sp"], overwrite=True)
116+
sdata_output.write(par["output_sp"], overwrite=True)

0 commit comments

Comments
 (0)