Skip to content

Commit 9e4588a

Browse files
committed
Support of S3 bucket for block extraction
1 parent d26f2c6 commit 9e4588a

File tree

1 file changed

+58
-13
lines changed

1 file changed

+58
-13
lines changed

scripts/extract_block.py

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,46 +2,90 @@
22
import argparse
33
import numpy as np
44
import z5py
5+
import zarr
6+
7+
import s3fs
58

69
"""
710
This script extracts data around an input center coordinate in a given ROI halo.
11+
12+
The support for using an S3 bucket is currently limited to the lightsheet-cochlea bucket with the endpoint url https://s3.fs.gwdg.de.
13+
If more use cases appear, the script will be generalized.
14+
The usage requires the export of the access and the secret access key within the environment before executing the script.
15+
run the following commands in the shell of your choice, or add them to your ~/.bashrc:
16+
export AWS_ACCESS_KEY_ID=<access key>
17+
export AWS_SECRET_ACCESS_KEY=<secret access key>
818
"""
919

1020

11-
def main(input_file, output_dir, input_key, resolution, coords, roi_halo):
21+
def main(input_file, output_dir, input_key, resolution, coords, roi_halo, s3):
1222
"""
1323
1424
:param str input_file: File path to input folder in n5 format
1525
:param str output_dir: output directory for saving cropped n5 file as <basename>_crop.n5
1626
:param str input_key: Key for accessing volume in n5 format, e.g. 'setup0/s0'
1727
:param float resolution: Resolution of input data in micrometer
18-
:param str coords: Center coordinates of extracted 3D volume in format 'z,y,x'
19-
:param str roi_halo: ROI halo of extracted 3D volume in format 'z,y,x'
28+
:param str coords: Center coordinates of extracted 3D volume in format 'x,y,z'
29+
:param str roi_halo: ROI halo of extracted 3D volume in format 'x,y,z'
30+
:param bool s3: Flag for using an S3 bucket
2031
"""
2132

2233
coords = [int(r) for r in coords.split(",")]
2334
roi_halo = [int(r) for r in roi_halo.split(",")]
2435

36+
coord_string = "-".join([str(c) for c in coords])
37+
38+
# Dimensions are inversed to view in MoBIE (x y z) -> (z y x)
39+
coords.reverse()
40+
roi_halo.reverse()
41+
2542
input_content = list(filter(None, input_file.split("/")))
26-
basename = "".join(input_content[-1].split(".")[:-1])
43+
44+
if s3:
45+
basename = input_content[0] + "_" + input_content[-1].split(".")[0]
46+
else:
47+
basename = "".join(input_content[-1].split(".")[:-1])
48+
2749
input_dir = input_file.split(basename)[0]
2850
input_dir = os.path.abspath(input_dir)
2951

30-
if "" == output_dir:
52+
if output_dir == "":
3153
output_dir = input_dir
3254

33-
output_file = os.path.join(output_dir, basename + "_crop" + ".n5")
34-
35-
#M_LR_000167_R, coords = '806,1042,1334', coords = (z, y, x) compared to MoBIE view
55+
output_file = os.path.join(output_dir, basename + "_crop_" + coord_string + ".n5")
3656

3757
coords = np.array(coords)
3858
coords = coords / resolution
3959
coords = np.round(coords).astype(np.int32)
4060

4161
roi = tuple(slice(co - rh, co + rh) for co, rh in zip(coords, roi_halo))
4262

43-
with z5py.File(input_file, "r") as f:
44-
raw = f[input_key][roi]
63+
if s3:
64+
65+
# Define S3 bucket and OME-Zarr dataset path
66+
67+
bucket_name = "cochlea-lightsheet"
68+
zarr_path = f"{bucket_name}/{input_file}"
69+
70+
# Create an S3 filesystem
71+
fs = s3fs.S3FileSystem(
72+
client_kwargs={"endpoint_url": "https://s3.fs.gwdg.de"},
73+
anon=False
74+
)
75+
76+
if not fs.exists(zarr_path):
77+
print("Error: Path does not exist!")
78+
79+
# Open the OME-Zarr dataset
80+
store = zarr.storage.FSStore(zarr_path, fs=fs)
81+
print(f"Opening file {zarr_path} from the S3 bucket.")
82+
83+
with zarr.open(store, mode="r") as f:
84+
raw = f[input_key][roi]
85+
86+
else:
87+
with z5py.File(input_file, "r") as f:
88+
raw = f[input_key][roi]
4589

4690
with z5py.File(output_file, "w") as f_out:
4791
f_out.create_dataset("raw", data=raw, compression="gzip")
@@ -53,13 +97,14 @@ def main(input_file, output_dir, input_key, resolution, coords, roi_halo):
5397

5498
parser.add_argument('input', type=str, help="Input file in n5 format.")
5599
parser.add_argument('-o', "--output", type=str, default="", help="Output directory")
56-
parser.add_argument('-c', "--coord", type=str, required=True, help="3D coordinate in format 'z,y,x' as center of extracted block. Dimensions are inversed to view in MoBIE (x y z) -> (z y x)")
100+
parser.add_argument('-c', "--coord", type=str, required=True, help="3D coordinate in format 'x,y,z' as center of extracted block.")
57101

58102
parser.add_argument('-k', "--input_key", type=str, default="setup0/timepoint0/s0", help="Input key for data in input file")
59103
parser.add_argument('-r', "--resolution", type=float, default=0.38, help="Resolution of input in micrometer")
60104

61-
parser.add_argument("--roi_halo", type=str, default="128,128,64", help="ROI halo around center coordinate")
105+
parser.add_argument("--roi_halo", type=str, default="128,128,64", help="ROI halo around center coordinate in format 'x,y,z'")
106+
parser.add_argument("--s3", action="store_true", help="Use S3 bucket")
62107

63108
args = parser.parse_args()
64109

65-
main(args.input, args.output, args.input_key, args.resolution, args.coord, args.roi_halo)
110+
main(args.input, args.output, args.input_key, args.resolution, args.coord, args.roi_halo, args.s3)

0 commit comments

Comments
 (0)