computational-cell-analytics
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎flamingo_tools/data_conversion.py‎
Lines changed: 126 additions & 92 deletions b/‎flamingo_tools/data_conversion.py‎
Lines changed: 126 additions & 92 deletions
diff --git a/‎flamingo_tools/test_data.py‎
Lines changed: 1 addition & 1 deletion b/‎flamingo_tools/test_data.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎flamingo_tools/version.py‎
Lines changed: 1 addition & 0 deletions b/‎flamingo_tools/version.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎scripts/data_transfer/.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎scripts/data_transfer/.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎scripts/data_transfer/README.md‎
Lines changed: 43 additions & 0 deletions b/‎scripts/data_transfer/README.md‎
Lines changed: 43 additions & 0 deletions
@@ -1,3 +1,4 @@
 synthetic_data/
 __pycache__/
 converted/
+*.egg-info/
@@ -1,5 +1,6 @@
 import multiprocessing as mp
 import os
+import re
 
 from glob import glob
 from pathlib import Path
@@ -58,19 +59,15 @@ def _read_start_position_flamingo(path):
     return start_position
 
 
-def read_metadata_flamingo(metadata_paths, center_tiles):
-    start_positions = []
+def read_metadata_flamingo(metadata_path, offset=None):
     resolution, unit = None, None
-    for path in metadata_paths:
-        resolution, unit = _read_resolution_and_unit_flamingo(path)
-        start_position = _read_start_position_flamingo(path)
-        start_positions.append(start_position)
 
-    start_positions = np.array(start_positions)
-    offset = np.min(start_positions, axis=0) if center_tiles else np.array([0.0, 0.0, 0.0])
+    resolution, unit = _read_resolution_and_unit_flamingo(metadata_path)
+    start_position = _read_start_position_flamingo(metadata_path)
 
     def _pos_to_trafo(pos):
-        pos -= offset
+        if offset is not None:
+            pos -= offset
 
         # FIXME: dirty hack
         # scale = 4
@@ -97,11 +94,9 @@ def _pos_to_trafo(pos):
         }
         return trafo
 
-    transformations = [
-        _pos_to_trafo(pos) for pos in start_positions
-    ]
+    transformation = _pos_to_trafo(start_position)
     # We have to reverse the resolution because pybdv expects ZYX.
-    return resolution[::-1], unit, transformations
+    return resolution[::-1], unit, transformation
 
 
 # TODO derive the scale factors from the shape rather than hard-coding it to 5 levels
@@ -110,30 +105,15 @@ def derive_scale_factors(shape):
     return scale_factors
 
 
-def _to_bdv(
-    data, out_path, scale_factors, n_threads, resolution, unit, channel_id, channel_name, tile_id, tile_transformation
-):
-    pybdv.make_bdv(
-        data, out_path,
-        downscale_factors=scale_factors, downscale_mode="mean",
-        n_threads=n_threads,
-        resolution=resolution, unit=unit,
-        attributes={
-            "channel": {"id": channel_id, "name": channel_name}, "tile": {"id": tile_id, "name": str(tile_id)},
-            "angle": {"id": 0, "name": "0"}, "illumination": {"id": 0, "name": "0"}
-        },
-        affine=tile_transformation,
-    )
-
+def _to_ome_zarr(data, out_path, scale_factors, timepoint, setup_id, attributes, unit, resolution):
+    n_threads = mp.cpu_count()
+    chunks = (128, 128, 128)
 
-def _to_ome_zarr(
-    data, out_path, scale_factors, n_threads, resolution, unit, channel_id, channel_name, tile_id, tile_transformation
-):
     # Write the base dataset.
-    base_key = f"c{channel_id}-t{tile_id}"
-    chunks = (128, 128, 128)
+    base_key = f"setup{setup_id}/timepoint{timepoint}"
+
     with open_file(out_path, "a") as f:
-        ds = f.create_dataset(f"{base_key}/s0", shape=data.shape, compression='gzip',
+        ds = f.create_dataset(f"{base_key}/s0", shape=data.shape, compression="gzip",
                               chunks=chunks, dtype=data.dtype)
         ds.n_threads = n_threads
         ds[:] = data
@@ -143,27 +123,70 @@ def _to_ome_zarr(
         for level, scale_factor in enumerate(scale_factors, 1):
             inv_scale = [1.0 / sc for sc in scale_factor]
             data = rescale(data, inv_scale, preserve_range=True).astype(data.dtype)
-            ds = f.create_dataset(f"{base_key}/s{level}", shape=data.shape, compression='gzip',
+            ds = f.create_dataset(f"{base_key}/s{level}", shape=data.shape, compression="gzip",
                                   chunks=chunks, dtype=data.dtype)
             ds.n_threads = n_threads
             ds[:] = data
 
+        g = f[f"setup{setup_id}"]
+        g.attrs.update(attributes)
+
     # Write the ome zarr metadata.
     metadata_dict = {"unit": unit, "resolution": resolution}
     write_format_metadata(
         "ome.zarr", out_path, metadata_dict, scale_factors=scale_factors, prefix=base_key
     )
 
 
+def flamingo_filename_parser(file_path, name_mapping):
+    filename = os.path.basename(file_path)
+
+    # Extract the timepoint.
+    match = re.search(r'_t(\d+)_', filename)
+    if match:
+        timepoint = int(match.group(1))
+    else:
+        timepoint = 0
+
+    # Extract the additional attributes.
+    attributes = {}
+    if name_mapping is None:
+        name_mapping = {}
+
+    # Extract the channel.
+    match = re.search(r'_C(\d+)_', filename)
+    channel = int(match.group(1)) if match else 0
+    channel_mapping = name_mapping.get("channel", {})
+    attributes["channel"] = {"id": channel, "name": channel_mapping.get(channel, str(channel))}
+
+    # Extract the tile.
+    match = re.search(r'_R(\d+)_', filename)
+    tile = int(match.group(1)) if match else 0
+    tile_mapping = name_mapping.get("tile", {})
+    attributes["tile"] = {"id": tile, "name": tile_mapping.get(tile, str(tile))}
+
+    # Extract the illumination.
+    match = re.search(r'_I(\d+)_', filename)
+    illumination = int(match.group(1)) if match else 0
+    illumination_mapping = name_mapping.get("illumination", {})
+    attributes["illumination"] = {"id": illumination, "name": illumination_mapping.get(illumination, str(illumination))}
+
+    # BDV also supports an angle attribute, but it does not seem to be stored in the filename
+    # "angle": {"id": 0, "name": "0"}
+
+    attribute_id = f"c{channel}-t{tile}-i{illumination}"
+    return timepoint, attributes, attribute_id
+
+
 def convert_lightsheet_to_bdv(
     root: str,
-    channel_folders: Dict[str, str],
-    image_file_name_pattern: str,
     out_path: str,
+    attribute_parser: callable = flamingo_filename_parser,
+    attribute_names: Optional[Dict[str, Dict[int, str]]] = None,
     metadata_file_name_pattern: Optional[str] = None,
     metadata_root: Optional[str] = None,
     metadata_type: str = "flamingo",
-    center_tiles: bool = True,
+    center_tiles: bool = False,
     resolution: Optional[List[float]] = None,
     unit: Optional[str] = None,
     scale_factors: Optional[List[List[int]]] = None,
@@ -174,24 +197,14 @@ def convert_lightsheet_to_bdv(
     The data is converted to the bdv-n5 file format and can be opened with BigDataViewer
     or BigStitcher. This function is written with data layout and metadata of flamingo
     microscopes in mind, but could potentially be adapted to other data formats.
-    We currently don't support multiple timepoints, but support can be added if needed.
 
-    This function assumes the following input data format:
-    <ROOT>/<CHANNEL1>/<TILE1>.tif
-                     /<TILE2>.tif
-                     /...
-          /<CHANNEL2>/<TILE1>.tif
-                     /<TILE2>.tif
-                     /...
+    TODO explain the attribute parsing.
 
     Args:
-        root: Folder that contains the folders with tifs for each channel.
-        channel_folders: Dictionary that maps the name of each channel to the corresponding folder name
-            underneath the root folder.
-        image_file_name_pattern: The pattern for the names of the tifs that contain the data.
-            This expects a glob pattern (name with '*') to select the corresponding tif files .
-            The simplest pattern that should work in most cases is '*.tif'.
+        root: Folder that contains the image data stored as tifs.
+            This function will take into account all tif files in folders beneath this root directory.
         out_path: Output path where the converted data is saved.
+        attribute_parser: TODO
         metadata_file_name_pattern: The pattern for the names of files that contain the metadata.
             For flamingo metadata the following pattern should work: '*_Settings.txt'.
         metadata_root: Different root folder for the metadata. By default 'root' is used here as well.
@@ -216,60 +229,81 @@ def convert_lightsheet_to_bdv(
 
     # Make sure we convert to n5, in case no extension is passed.
     ext = os.path.splitext(out_path)[1]
+    convert_to_ome_zarr = False
     if ext == "":
         out_path = str(Path(out_path).with_suffix(".n5"))
-        conversion_function = _to_bdv
     elif ext == ".zarr":
-        conversion_function = _to_ome_zarr
-    else:
-        conversion_function = _to_bdv
+        convert_to_ome_zarr = True
 
-    # Iterate over the channels
-    for channel_id, (channel_name, channel_folder) in enumerate(channel_folders.items()):
-
-        # Get all the image file paths for this channel.
-        tile_pattern = os.path.join(root, channel_folder, image_file_name_pattern)
-        file_paths = sorted(glob(tile_pattern))
-        assert len(file_paths) > 0, tile_pattern
+    files = sorted(glob(os.path.join(root, "**/*.tif"), recursive=True))
+    if metadata_file_name_pattern is None:
+        metadata_files = [None] * len(files)
+        offset = None
+    else:
+        metadata_files = sorted(
+            glob(
+                os.path.join(root if metadata_root is None else metadata_root, f"**/{metadata_file_name_pattern}"),
+                recursive=True
+            )
+        )
+        assert len(metadata_files) == len(files)
+
+        if center_tiles:
+            start_positions = []
+            for mpath in metadata_files:
+                start_positions.append(_read_start_position_flamingo(mpath))
+            offset = np.min(start_positions, axis=0)
+        else:
+            offset = None
+
+    next_setup_id = 0
+    attrs_to_setups = {}
+
+    for file_path, metadata_file in zip(files, metadata_files):
+        timepoint, attributes, aid = attribute_parser(file_path, attribute_names)
+
+        if aid in attrs_to_setups:
+            setup_id = attrs_to_setups[aid]
+        else:
+            attrs_to_setups[aid] = next_setup_id
+            setup_id = next_setup_id
+            next_setup_id += 1
 
         # Read the metadata if it was given.
-        if metadata_file_name_pattern is None:  # No metadata given.
+        if metadata_file is None:  # No metadata given.
             # We don't use any tile transformation.
-            tile_transformations = [None] * len(file_paths)
+            tile_transformation = None
             # Set resolution and unit to their default values if they were not passed.
             if resolution is None:
                 resolution = [1.0, 1.0, 1.0]
             if unit is None:
                 unit = "pixel"
 
         else:  # We have metadata and read it.
-            metadata_pattern = os.path.join(
-                root if metadata_root is None else metadata_root,
-                channel_folder, metadata_file_name_pattern
-            )
-            metadata_paths = sorted(glob(metadata_pattern))
-            assert len(metadata_paths) == len(file_paths)
-            resolution, unit, tile_transformations = read_metadata_flamingo(metadata_paths, center_tiles)
-
-        if channel_name is None or channel_name.strip() == "":  # channel name is empty, assign channel id as name
-            channel_name = str(channel_id)
-
-        for tile_id, (file_path, tile_transformation) in enumerate(zip(file_paths, tile_transformations)):
-
-            # Try to memmap the data. If that doesn't work fall back to loading it into memory.
-            try:
-                data = tifffile.memmap(file_path, mode="r")
-            except ValueError:
-                print(f"Could not memmap the data from {file_path}. Fall back to load it into memory.")
-                data = tifffile.imread(file_path)
-
-            print("Converting channel", channel_id, "tile", tile_id, "from", file_path, "with shape", data.shape)
-            if scale_factors is None:
-                scale_factors = derive_scale_factors(data.shape)
-
-            conversion_function(
-                data, out_path, scale_factors, n_threads, resolution, unit,
-                channel_id, channel_name, tile_id, tile_transformation
+            resolution, unit, tile_transformation = read_metadata_flamingo(metadata_file, offset)
+
+        try:
+            data = tifffile.memmap(file_path, mode="r")
+        except ValueError:
+            print(f"Could not memmap the data from {file_path}. Fall back to load it into memory.")
+            data = tifffile.imread(file_path)
+
+        print(f"Converting tp={timepoint}, channel={attributes['channel']}, tile={attributes['tile']}")
+        if scale_factors is None:
+            scale_factors = derive_scale_factors(data.shape)
+
+        if convert_to_ome_zarr:
+            _to_ome_zarr(data, out_path, scale_factors, timepoint, setup_id, attributes, unit, resolution)
+        else:
+            pybdv.make_bdv(
+                data, out_path,
+                downscale_factors=scale_factors, downscale_mode="mean",
+                n_threads=n_threads,
+                resolution=resolution, unit=unit,
+                attributes=attributes,
+                affine=tile_transformation,
+                timepoint=timepoint,
+                setup_id=setup_id,
             )
 
 
 
@@ -7,7 +7,7 @@
 # TODO add metadata
 def create_test_data(root, size=256, n_channels=2, n_tiles=4):
     channel_folders = [f"channel{chan_id}" for chan_id in range(n_channels)]
-    file_name_pattern = "volume_R%i_C%i.tif"
+    file_name_pattern = "volume_R%i_C%i_I0.tif"
     for chan_id, channel_folder in enumerate(channel_folders):
         out_folder = os.path.join(root, channel_folder)
         os.makedirs(out_folder, exist_ok=True)
 
@@ -0,0 +1 @@
+__version__ = "0.0.1"
@@ -0,0 +1 @@
+credentials.json
@@ -0,0 +1,43 @@
+# Data Transfer Moser
+
+## Transfer via smbclient
+
+Current approach to the data transfer:
+- Log in to SCC login node:
+  $ 
+- Go to `/scratch1/projects/cca/data/moser`
+- Create subfolder <NAME> for cochlea to be copied 
+- Log in via 
+```
+$ smbclient \\\\wfs-medizin.top.gwdg.de\\ukon-all\$\\ukon100 -U GWDG\\pape41"
+```
+- Go to the folder with the cochlea to copy (cd works)
+- Copy the folder via:
+    - recurse ON
+    - prompt OFF
+    - mget *
+- Copy this to HLRN by logging into it and running
+```
+  $ rsync -e "ssh -i ~/.ssh/id_rsa_hlrn" -avz [email protected]:/scratch1/projects/cca/data/mose
+r/<NAME> /mnt/lustre-emmy-hdd/projects/nim00007/data/moser/lightsheet/volumes/<NAME>
+```
+- Remove on SCC
+
+## Next files
+
+- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\M171_2R_converted_n5
+    - unclear what the converted data is
+- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\155_1L_converted_n5\BDVexport.n5
+    - Copied to SCC, need to rsync.
+- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\MLR151_2R_converted_n5
+- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\G11_1L_converted_n5
+
+## Improvements
+
+Try to automate via https://github.com/jborean93/smbprotocol see `sync_smb.py` for ChatGPT's inital version.
+Connection not possible from HLRN.
+
+## Transfer Back
+
+For transfering back MoBIE results.
+...