Skip to content

Commit 98c6db4

Browse files
Further updates to processing scripts; add function to sync mobie dataset
1 parent 7921317 commit 98c6db4

File tree

9 files changed

+118
-19
lines changed

9 files changed

+118
-19
lines changed

flamingo_tools/s3_utils.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
"""This file contains utility functions for processing data located on an S3 storage.
22
The upload of data to the storage system should be performed with 'rclone'.
33
"""
4+
import json
45
import os
6+
import warnings
7+
from shutil import which
8+
from subprocess import run
59
from typing import Optional, Tuple
610

711
import s3fs
@@ -186,3 +190,96 @@ def create_s3_target(
186190
else:
187191
s3_filesystem = s3fs.S3FileSystem(anon=anon, client_kwargs=client_kwargs, asynchronous=asynchronous)
188192
return s3_filesystem
193+
194+
195+
def _sync_rclone(local_dir, target):
196+
# The rclone alias could also be exposed as parameter.
197+
rclone_alias = "cochlea-lightsheet"
198+
print("Sync", local_dir, "to", target)
199+
run(["rclone", "--progress", "copyto", local_dir, f"{rclone_alias}:{target}"])
200+
201+
202+
def sync_dataset(
203+
mobie_root: str,
204+
dataset_name: str,
205+
bucket_name: Optional[str] = None,
206+
url: Optional[str] = None,
207+
anon: Optional[str] = False,
208+
credential_file: Optional[str] = None,
209+
force_segmentation_update: bool = False,
210+
) -> None:
211+
"""Sync a MoBIE dataset on the s3 bucket using rclone.
212+
213+
Args:
214+
mobie_root: The directory with the local mobie project.
215+
dataset_name: The mobie dataset to sync.
216+
bucket_name: The name of the dataset's bucket on s3.
217+
url: Service endpoint for S3 bucket
218+
anon: Option for anon argument of S3FileSystem
219+
credential_file: File path to credentials
220+
force_segmentation_update: Whether to force segmentation updates.
221+
"""
222+
from mobie.metadata import add_remote_project_metadata
223+
224+
# Make sure that rclone is loaded.
225+
if which("rclone") is None:
226+
raise RuntimeError("rclone is required for synchronization. Try loading it via 'module load rclone'.")
227+
228+
# Make sure the dataset is in the local version of the dataset.
229+
with open(os.path.join(mobie_root, "project.json")) as f:
230+
project_metadata = json.load(f)
231+
datasets = project_metadata["datasets"]
232+
assert dataset_name in datasets
233+
234+
# Get s3 filsystem and bucket name.
235+
s3 = create_s3_target(url, anon, credential_file)
236+
if bucket_name is None:
237+
bucket_name = BUCKET_NAME
238+
if url is None:
239+
url = SERVICE_ENDPOINT
240+
241+
# Add the required remote metadata to the project. Suppress warnings about missing local data.
242+
with warnings.catch_warnings():
243+
warnings.filterwarnings("ignore", category=UserWarning)
244+
add_remote_project_metadata(mobie_root, bucket_name, url)
245+
246+
# Get the metadata from the S3 bucket.
247+
project_metadata_path = os.path.join(bucket_name, "project.json")
248+
with s3.open(project_metadata_path, "r") as f:
249+
project_metadata = json.load(f)
250+
251+
# Check if the dataset is part of the remote project already.
252+
local_ds_root = os.path.join(mobie_root, dataset_name)
253+
remote_ds_root = os.path.join(bucket_name, dataset_name)
254+
if dataset_name not in project_metadata["datasets"]:
255+
print("The dataset is not yet synced. Will copy it over.")
256+
_sync_rclone(os.path.join(mobie_root, "project.json"), project_metadata_path)
257+
_sync_rclone(local_ds_root, remote_ds_root)
258+
return
259+
260+
# Otherwise, check which sources are new and add them.
261+
with open(os.path.join(mobie_root, dataset_name, "dataset.json")) as f:
262+
local_dataset_metadata = json.load(f)
263+
264+
dataset_metadata_path = os.path.join(bucket_name, dataset_name, "dataset.json")
265+
with s3.open(dataset_metadata_path, "r") as f:
266+
remote_dataset_metadata = json.load(f)
267+
268+
for source_name, source_data in local_dataset_metadata["sources"].items():
269+
source_type, source_data = next(iter(source_data.items()))
270+
is_segmentation = source_type == "segmentation"
271+
is_spots = source_type == "spots"
272+
data_path = source_data["imageData"]["ome.zarr"]["relativePath"]
273+
source_not_on_remote = (source_name not in remote_dataset_metadata["sources"])
274+
# Only update the image data if the source is not updated or if we force updates for segmentations.
275+
if source_not_on_remote or (is_segmentation and force_segmentation_update):
276+
_sync_rclone(os.path.join(local_ds_root, data_path), os.path.join(remote_ds_root, data_path))
277+
# We always sync the tables.
278+
if is_segmentation or is_spots:
279+
table_path = source_data["tableData"]["tsv"]["relativePath"]
280+
_sync_rclone(os.path.join(local_ds_root, table_path), os.path.join(remote_ds_root, table_path))
281+
282+
# Sync the dataset metadata.
283+
_sync_rclone(
284+
os.path.join(mobie_root, dataset_name, "dataset.json"), os.path.join(remote_ds_root, "dataset.json")
285+
)

reproducibility/label_components/repro_label_components.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def repro_label_components(
4444
# The table name sometimes has to be over-written.
4545
# table_name = "PV_SGN_V2_DA"
4646
# table_name = "CR_SGN_v2"
47+
# table_name = "Ntng1_SGN_v2"
4748

4849
table_name = f"{cell_type.upper()}_{unet_version}"
4950

reproducibility/templates_processing/REAMDE.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ For IHC segmentation run:
1414

1515
After this, run the following to add segmentation to MoBIE, create component labelings and upload to S3:
1616
- templates_transfer/mobie_segmentation_template.sbatch
17-
- templates_transfer/s3_seg_template.sh
17+
- templates_transfer/sync_mobie.py
1818
- label_components/repro_label_components.py
19-
- templates_transfer/s3_seg_template.sh
19+
- templates_transfer/sync_mobie.py
2020

2121
For ribbon synapse detection without associated IHC segmentation run
2222
- detect_synapse_template.sbatch
@@ -25,4 +25,4 @@ For ribbon synapse detection with associated IHC segmentation run
2525

2626
After this, run the following to add detections to MoBIE and upload to S3:
2727
- templates_transfer/mobie_spots_template.sbatch
28-
- s3_synapse_template.sh
28+
- templates_transfer/sync_mobie.py

reproducibility/templates_processing/apply_unet_SGN_template.sbatch

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ export INPUT=/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/"$COC
4040
export OUTPUT_FOLDER=/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/predictions/"$COCHLEA"/"$SEG_NAME"
4141

4242
# The default v2 model
43-
# export MODEL=/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/trained_models/SGN/v2_cochlea_distance_unet_SGN_supervised_2025-05-27
43+
export MODEL=/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/trained_models/SGN/v2_cochlea_distance_unet_SGN_supervised_2025-05-27
4444

4545
# Domain adapted model for MLR99L
46-
export MODEL=/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/trained_models/SGN/v2_domain_adaptation_mlr99l/best.pt
46+
# export MODEL=/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/trained_models/SGN/v2_domain_adaptation_mlr99l/best.pt
4747

4848
export PREDICTION_INSTANCES=10
4949

reproducibility/templates_processing/detect_synapse_marker_template.sbatch

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,3 @@ python $SCRIPT \
5454
--model $MODEL \
5555
--max_distance $MAX_DISTANCE \
5656
--s3
57-
Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
#!/bin/bash
22
#SBATCH --job-name=synapse-detect
3-
#SBATCH -t 42:00:00 # estimated time, adapt to your needs
3+
#SBATCH -t 12:00:00 # estimated time, adapt to your needs
44
#SBATCH [email protected] # change this to your mailaddress
55
#SBATCH --mail-type=FAIL # send mail when job begins and ends
66

77
#SBATCH -p grete:shared # the partition
88
#SBATCH -G A100:1 # For requesting 1 A100 GPU.
99
#SBATCH -A nim00007
10-
#SBATCH -c 2
11-
#SBATCH --mem 500G
10+
#SBATCH -c 8
11+
#SBATCH --mem 128G
1212

1313
source ~/.bashrc
14-
micromamba activate micro-sam_gpu
14+
# micromamba activate micro-sam_gpu
15+
micromamba activate sam
1516

1617
# Print out some info.
1718
echo "Submitting job with sbatch from directory: ${SLURM_SUBMIT_DIR}"
@@ -22,7 +23,8 @@ echo "Current node: ${SLURM_NODELIST}"
2223
# Run the script
2324
#python myprogram.py $SLURM_ARRAY_TASK_ID
2425

25-
SCRIPT_REPO=/user/schilling40/u15000/flamingo-tools
26+
# SCRIPT_REPO=/user/schilling40/u15000/flamingo-tools
27+
SCRIPT_REPO=/user/pape41/u12086/Work/my_projects/flamingo-tools
2628
cd "$SCRIPT_REPO"/flamingo_tools/segmentation/ || exit
2729

2830
export SCRIPT_DIR=$SCRIPT_REPO/scripts
@@ -31,15 +33,13 @@ export SCRIPT_DIR=$SCRIPT_REPO/scripts
3133
COCHLEA=$1
3234
# image channel, e.g. CTBP2 or RibA
3335
IMAGE_CHANNEL=$2
34-
# segmentation name, as it appears in MoBIE, e.g. synapses_v3
35-
IHC_SEG=$3
3636

3737
export INPUT_PATH="$COCHLEA"/images/ome-zarr/"$IMAGE_CHANNEL".ome.zarr
38-
export MASK_PATH="$COCHLEA"/images/ome-zarr/"$IHC_SEG".ome.zarr
3938

4039
# data on NHR
4140
# export INPUT_PATH=/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/"$COCHLEA"/"$DATA"
4241
# export INPUT_KEY="setup$STAIN_CHANNEL/timepoint0/s0"
42+
INPUT_KEY="s0"
4343

4444
export OUTPUT_FOLDER=/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/predictions/"$COCHLEA"/synapses_v3
4545

@@ -52,10 +52,10 @@ export MODEL=/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/train
5252
echo "OUTPUT_FOLDER $OUTPUT_FOLDER"
5353
echo "MODEL $MODEL"
5454

55-
python ~/flamingo-tools/scripts/synapse_marker_detection/run_prediction.py \
55+
SCRIPT="$SCRIPT_DIR"/synapse_marker_detection/run_prediction.py
56+
python $SCRIPT \
5657
--input "$INPUT_PATH" \
5758
--input_key "$INPUT_KEY" \
5859
--output_folder "$OUTPUT_FOLDER" \
5960
--model $MODEL \
6061
--s3
61-

reproducibility/templates_transfer/mobie_image_template.sbatch

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/bin/bash
22
#SBATCH --job-name=mobie_image
3-
#SBATCH -t 01:00:00 # estimated time, adapt to your needs
3+
#SBATCH -t 12:00:00 # estimated time, adapt to your needs
44
#SBATCH [email protected] # change this to your mailaddress
55
#SBATCH --mail-type=FAIL # send mail when job begins and ends
66

@@ -10,8 +10,8 @@
1010
#SBATCH --mem 180G
1111

1212
source ~/.bashrc
13-
source ~/miniconda3/bin/activate
14-
source activate mobie
13+
# source activate sam
14+
micromamba activate sam
1515

1616
# Run the script
1717

reproducibility/templates_transfer/s3_seg_template.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ mobie.add_remote_metadata -i $MOBIE_DIR -s $SERVICE_ENDPOINT -b $BUCKET_NAME
1313

1414
rclone --progress copyto "$MOBIE_DIR"/"$COCHLEA"/dataset.json cochlea-lightsheet:cochlea-lightsheet/"$COCHLEA"/dataset.json
1515
rclone --progress copyto "$MOBIE_DIR"/"$COCHLEA"/images/ome-zarr/"$SEG_CHANNEL".ome.zarr cochlea-lightsheet:cochlea-lightsheet/"$COCHLEA"/images/ome-zarr/"$SEG_CHANNEL".ome.zarr
16+
# TODO enable to also sync the whole thing and project.json
1617
# take care that segmentation tables containing evaluations (tonotopic mapping, marker labels, etc.) might be overwritten
1718
rclone --progress copyto "$MOBIE_DIR"/"$COCHLEA"/tables/"$SEG_CHANNEL" cochlea-lightsheet:cochlea-lightsheet/"$COCHLEA"/tables/"$SEG_CHANNEL"
1819

reproducibility/tonotopic_mapping/repro_tonotopic_mapping.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def repro_tonotopic_mapping(
4242

4343
cochlea_str = "-".join(cochlea.split("_"))
4444
seg_str = "-".join(seg_channel.split("_"))
45+
os.makedirs(output_dir, exist_ok=True)
4546
output_table_path = os.path.join(output_dir, f"{cochlea_str}_{seg_str}.tsv")
4647

4748
s3_path = os.path.join(f"{cochlea}", "tables", f"{seg_channel}", "default.tsv")

0 commit comments

Comments
 (0)