|
13 | 13 | } |
14 | 14 | ### VIASH END |
15 | 15 |
|
| 16 | + |
| 17 | +def get_crop_coords(sdata, max_n_pixels=50000*50000): |
| 18 | + """Get the crop coordinates to subset the sdata to max_n_pixels |
| 19 | + |
| 20 | + Arguments |
| 21 | + --------- |
| 22 | + sdata: spatialdata.SpatialData |
| 23 | + The spatial data to crop |
| 24 | + max_n_pixels: int |
| 25 | + The maximum number of pixels to keep |
| 26 | + |
| 27 | + Returns |
| 28 | + ------- |
| 29 | + crop_coords: dict |
| 30 | + The crop coordinates |
| 31 | + """ |
| 32 | + |
| 33 | + _, h, w = sdata['morphology_mip']["scale0"].image.shape |
| 34 | + #h, w = sdata |
| 35 | + |
| 36 | + # Check if the image is already below the maximum number of pixels |
| 37 | + if h * w <= max_n_pixels: |
| 38 | + return None |
| 39 | + |
| 40 | + # Initialize with square crop |
| 41 | + h_crop = w_crop = int(np.sqrt(max_n_pixels)) |
| 42 | + |
| 43 | + # Adjust crop if necessary to fit the image |
| 44 | + if h_crop > h: |
| 45 | + h_crop = h |
| 46 | + w_crop = int(max_n_pixels / h_crop) |
| 47 | + elif w_crop > w: |
| 48 | + w_crop = w |
| 49 | + h_crop = int(max_n_pixels / w_crop) |
| 50 | + |
| 51 | + # Center the crop |
| 52 | + h_offset = (h - h_crop) // 2 |
| 53 | + w_offset = (w - w_crop) // 2 |
| 54 | + |
| 55 | + crop = [[h_offset, h_offset + h_crop], [w_offset, w_offset + w_crop]] |
| 56 | + |
| 57 | + return crop |
| 58 | + |
| 59 | + |
16 | 60 | # Load the single-cell data |
17 | 61 | adata = ad.read_h5ad(par["input_sc"]) |
18 | 62 |
|
|
41 | 85 | sdata.table.uns[orig_col] = sdata.table.uns[col] |
42 | 86 | sdata.table.uns[col] = par[col] |
43 | 87 |
|
| 88 | +# Correct the feature_key attribute in sdata if needed |
| 89 | +# NOTE: it would have been better to do this in the loader scripts, but this way the datasets don't need to be re-downloaded |
| 90 | +if "feature_key" in sdata['transcripts'].attrs["spatialdata_attrs"]: |
| 91 | + feature_key = sdata['transcripts'].attrs["spatialdata_attrs"]["feature_key"] |
| 92 | + if feature_key != "feature_name": |
| 93 | + sdata['transcripts'].attrs["spatialdata_attrs"]["feature_key"] = "feature_name" |
| 94 | + |
| 95 | +# Crop datasets that are too large |
| 96 | +crop_coords = get_crop_coords(sdata) |
| 97 | +if crop_coords is not None: |
| 98 | + sdata_output = sdata.query.bounding_box( |
| 99 | + axes=["y", "x"], |
| 100 | + min_coordinate=[crop_coords[0][0], crop_coords[1][0]], |
| 101 | + max_coordinate=[crop_coords[0][1], crop_coords[1][1]], |
| 102 | + target_coordinate_system="global", |
| 103 | + filter_table=True, |
| 104 | + ) |
| 105 | +else: |
| 106 | + sdata_output = sdata |
| 107 | + |
44 | 108 | # Save the single-cell data |
45 | 109 | adata.write_h5ad(par["output_sc"], compression="gzip") |
46 | 110 |
|
|
49 | 113 | shutil.rmtree(par["output_sp"]) |
50 | 114 |
|
51 | 115 | # Save the spatial data |
52 | | -sdata.write(par["output_sp"], overwrite=True) |
| 116 | +sdata_output.write(par["output_sp"], overwrite=True) |
0 commit comments