Skip to content

Commit a70d580

Browse files
Update scripts for processing portal segmentations
1 parent caba84f commit a70d580

File tree

3 files changed

+76
-4
lines changed

3 files changed

+76
-4
lines changed

scripts/cryo/cryo-et-portal/download_tomogram_lists.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,13 @@ def download_tomograms_for_da():
2727
def download_tomograms_for_eval():
2828
with open("./list_for_eval.json") as f:
2929
run_ids = json.load(f)
30-
download_tomogram_list(run_ids)
30+
output_root = "/scratch-grete/projects/nim00007/cryo-et/from_portal/for_eval"
31+
download_tomogram_list(run_ids, output_root)
3132

3233

3334
def main():
34-
# download_tomograms_for_eval()
35-
download_tomograms_for_da()
35+
download_tomograms_for_eval()
36+
# download_tomograms_for_da()
3637

3738

3839
if __name__ == "__main__":
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import os
2+
from glob import glob
3+
from pathlib import Path
4+
5+
import h5py
6+
import numpy as np
7+
import zarr
8+
9+
from synapse_net.file_utils import read_mrc
10+
from tqdm import tqdm
11+
12+
from ome_zarr.writer import write_image
13+
from ome_zarr.io import parse_url
14+
15+
16+
IN_ROOT = "/scratch-grete/projects/nim00007/cryo-et/from_portal/for_eval"
17+
OUT_ROOT = "/scratch-grete/projects/nim00007/cryo-et/from_portal/segmentations/DA_with_new_portalData_origDim" # noqa
18+
19+
IN_ROOT = "/scratch-grete/projects/nim00007/cryo-et/from_portal/for_domain_adaptation"
20+
OUT_ROOT = "/scratch-grete/projects/nim00007/cryo-et/from_portal/segmentations/DA_with_new_portalData_forDAdata"
21+
22+
23+
def export_to_ome_zarr(export_file, seg, voxel_size):
24+
store = parse_url(export_file, mode="w").store
25+
root = zarr.group(store=store)
26+
27+
scale = list(voxel_size.values())
28+
trafo = [
29+
[{"scale": scale, "type": "scale"}]
30+
]
31+
write_image(seg, root, axes="zyx", coordinate_transformations=trafo, scaler=None)
32+
33+
34+
def export_segmentation(export_folder, segmentation_file):
35+
fname = Path(segmentation_file).stem
36+
key = "/vesicles/segment_from_vesicle_DA_portal_v3"
37+
export_file = os.path.join(export_folder, f"{fname}.ome.zarr")
38+
39+
if os.path.exists(export_file):
40+
return
41+
42+
input_file = os.path.join(IN_ROOT, f"{fname}.mrc")
43+
raw, voxel_size = read_mrc(input_file)
44+
voxel_size = {k: v * 10 for k, v in voxel_size.items()}
45+
46+
try:
47+
with h5py.File(segmentation_file, "r") as f:
48+
seg = f[key][:]
49+
except OSError as e:
50+
print(e)
51+
return
52+
53+
seg = np.flip(seg, axis=1)
54+
assert seg.shape == raw.shape
55+
56+
assert seg.max() < 128, f"{seg.max()}"
57+
seg = seg.astype("int8")
58+
export_to_ome_zarr(export_file, seg, voxel_size)
59+
60+
61+
def main():
62+
export_folder = "./for_portal2"
63+
os.makedirs(export_folder, exist_ok=True)
64+
files = glob(os.path.join(OUT_ROOT, "*.h5"))
65+
for file in tqdm(files):
66+
export_segmentation(export_folder, file)
67+
68+
69+
main()

synapse_net/file_utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,13 +170,15 @@ def read_data_from_cryo_et_portal_run(
170170
output_path: The path for saving the data. The data will be streamed if the path is not given.
171171
use_zarr_format: Whether to use the data in zarr format instead of mrc.
172172
processing_type: The processing type of the tomogram to download.
173-
id_field: The name of the id field.
173+
id_field: The name of the id field. One of 'id' or 'run_id'.
174+
The 'id' references specific tomograms, whereas 'run_id' references a collection of experimental data.
174175
scale_level: The scale level to read from the data. Only valid for zarr data.
175176
176177
Returns:
177178
The data read from the run.
178179
The voxel size read from the run.
179180
"""
181+
assert id_field in ("id", "run_id")
180182
if output_path is not None and os.path.exists(output_path):
181183
return read_ome_zarr(output_path) if use_zarr_format else read_mrc(output_path)
182184

0 commit comments

Comments
 (0)