diff --git a/flamingo_tools/data_conversion.py b/flamingo_tools/data_conversion.py index 26b92e7..798fa5d 100644 --- a/flamingo_tools/data_conversion.py +++ b/flamingo_tools/data_conversion.py @@ -207,9 +207,42 @@ def _write_missing_views(out_path): tree.write(xml_path) +def _parse_shape(metadata_file): + depth, height, width = None, None, None + + with open(metadata_file, "r") as f: + for line in f.readlines(): + line = line.strip().rstrip("\n") + if line.startswith("AOI width"): + width = int(line.split(" ")[-1]) + if line.startswith("AOI height"): + height = int(line.split(" ")[-1]) + if line.startswith("Number of planes saved"): + depth = int(line.split(" ")[-1]) + + assert depth is not None + assert height is not None + assert width is not None + return (depth, height, width) + + +def _load_data(file_path, metadata_file): + if Path(file_path).suffix == ".raw": + shape = _parse_shape(metadata_file) + data = np.memmap(file_path, mode="r", dtype="uint16", shape=shape) + else: + try: + data = tifffile.memmap(file_path, mode="r") + except ValueError: + print(f"Could not memmap the data from {file_path}. Fall back to load it into memory.") + data = tifffile.imread(file_path) + return data + + def convert_lightsheet_to_bdv( root: str, out_path: str, + file_ext: str = ".tif", attribute_parser: callable = flamingo_filename_parser, attribute_names: Optional[Dict[str, Dict[int, str]]] = None, metadata_file_name_pattern: Optional[str] = None, @@ -233,6 +266,8 @@ def convert_lightsheet_to_bdv( root: Folder that contains the image data stored as tifs. This function will take into account all tif files in folders beneath this root directory. out_path: Output path where the converted data is saved. + file_ext: The name of the file extension. By default assumes tif files (.tif). + Change to '.raw' to read files stored in raw format instead. attribute_parser: TODO metadata_file_name_pattern: The pattern for the names of files that contain the metadata. For flamingo metadata the following pattern should work: '*_Settings.txt'. @@ -264,7 +299,11 @@ def convert_lightsheet_to_bdv( elif ext == ".zarr": convert_to_ome_zarr = True - files = sorted(glob(os.path.join(root, "**/*.tif"), recursive=True)) + files = sorted(glob(os.path.join(root, f"**/*{file_ext}"), recursive=True)) + # Raise an error if we could not find any files. + if len(files) == 0: + raise ValueError(f"Could not find any files in {root} with extension {file_ext}.") + if metadata_file_name_pattern is None: metadata_files = [None] * len(files) offset = None @@ -275,7 +314,7 @@ def convert_lightsheet_to_bdv( recursive=True ) ) - assert len(metadata_files) == len(files) + assert len(metadata_files) == len(files), f"{len(metadata_files)}, {len(files)}" if center_tiles: start_positions = [] @@ -316,12 +355,7 @@ def convert_lightsheet_to_bdv( ) print(f"Converting tp={timepoint}, channel={attributes['channel']}, tile={attributes['tile']}") - try: - data = tifffile.memmap(file_path, mode="r") - except ValueError: - print(f"Could not memmap the data from {file_path}. Fall back to load it into memory.") - data = tifffile.imread(file_path) - + data = _load_data(file_path, metadata_file) if scale_factors is None: scale_factors = derive_scale_factors(data.shape) @@ -337,6 +371,7 @@ def convert_lightsheet_to_bdv( affine=tile_transformation, timepoint=timepoint, setup_id=setup_id, + chunks=(128, 128, 128), ) # We don't need to add additional xml metadata if we convert to ome-zarr. diff --git a/scripts/convert_raw_cochlea_sample.py b/scripts/convert_raw_cochlea_sample.py new file mode 100644 index 0000000..bb12816 --- /dev/null +++ b/scripts/convert_raw_cochlea_sample.py @@ -0,0 +1,13 @@ +from flamingo_tools.data_conversion import convert_lightsheet_to_bdv + + +def main(): + root = "/mnt/lustre-grete/usr/u12086/moser/lightsheet/M_LR_000172_R" + output = "/mnt/lustre-grete/usr/u12086/moser/lightsheet/M_LR_000172_R/converted.n5" + convert_lightsheet_to_bdv( + root, out_path=output, file_ext=".raw", + metadata_file_name_pattern="*_Settings.txt" + ) + + +main() diff --git a/scripts/data_transfer/README.md b/scripts/data_transfer/README.md index 5c7b7e2..edc4a2c 100644 --- a/scripts/data_transfer/README.md +++ b/scripts/data_transfer/README.md @@ -15,8 +15,7 @@ Current approach to the data transfer: - mget * - Copy this to HLRN by logging into it and running $ rsync pape41:/scratch1/projects/cca/data/moser/ - $ rsync -e "ssh -i ~/.ssh/id_rsa_hlrn" -avz pape41@login-mdc.hpc.gwdg.de:/scratch1/projects/cca/data/mose -r/ /mnt/lustre-grete/usr/u12086/moser/lightsheet/ + $ rsync -e "ssh -i ~/.ssh/id_rsa_hlrn" -avz pape41@login-mdc.hpc.gwdg.de:/scratch1/projects/cca/data/moser/ /mnt/lustre-grete/usr/u12086/moser/lightsheet/ - Remove on SCC ## Next files