diff --git a/nextflow_examples/.gitignore b/nextflow_examples/.gitignore new file mode 100644 index 0000000..e9cf1d4 --- /dev/null +++ b/nextflow_examples/.gitignore @@ -0,0 +1,4 @@ +work +output +.nextflow +.nextflow.log* \ No newline at end of file diff --git a/nextflow_examples/01_cardio_tiny_dataset/README.md b/nextflow_examples/01_cardio_tiny_dataset/README.md new file mode 100644 index 0000000..cd75164 --- /dev/null +++ b/nextflow_examples/01_cardio_tiny_dataset/README.md @@ -0,0 +1,26 @@ +# Run example 01 through Nextflow + +This example just works for the test dataset at `10.5281_zenodo.7057076`. Download it first by installing `zenodo-get` in your python environment and running `fetch_test_data_from_zenodo_2x2.sh`. + +### Nextflow setup +Check the README in the nextflow_examples folder for correct Nextflow setup & task installation + +### Necessary example setup +1. Adapt the paths at the top of the `run_nextflow.nf` script. Folder-paths need to end with a /: + - `task_root` needs to point to the fractal-tasks-core folder containing the task (see installation instructions). + - `fractal_demos_folder` needs to point to the fractal-demos folder +2. Download the example image data: Go to ../../examples/01_cardio_tiny_dataset/, follow the download instructions there. +3. Copy or create the measurement args_measurement.json file into the `extra_parameters` folder. If you have run the 01_cardio_tiny_dataset through Fractal using Fractal demos, copy it from ../../examples/01_cardio_tiny_dataset/Parameters. Otherwise, create it by running the relevant code from ../../examples/01_cardio_tiny_dataset/run_example.sh (the echo line). For example: + ``` + echo "{\"level\": 0, \"input_ROI_table\": \"well_ROI_table\", \"workflow_file\": \"`pwd`/../../examples/01_cardio_tiny_dataset/regionprops_from_existing_labels_feature.yaml\", \"input_specs\": {\"dapi_img\": {\"type\": \"image\", \"wavelength_id\": \"A01_C01\"}, \"label_img\": {\"type\": \"label\", \"label_name\": \"nuclei\"}}, \"output_specs\": {\"regionprops_DAPI\": {\"type\": \"dataframe\",\"table_name\": \"nuclei\"}}}" > extra_params/args_measurement.json + ``` +4. Run nextflow: `nextflow run run_nextflow.nf` + + +### Known limitations +1. Currently, everything is tested for 1 well only +2. Currently, nextflow & the tasks run in the same environment and all tasks run in the same conda environment. This can be generalized further +3. I haven't figured out where the task logs go so far. + + +Tested with fractal-tasks-core 0.9.0 diff --git a/nextflow_examples/01_cardio_tiny_dataset/extra_params/copy_ome_zarr.json b/nextflow_examples/01_cardio_tiny_dataset/extra_params/copy_ome_zarr.json new file mode 100644 index 0000000..6171849 --- /dev/null +++ b/nextflow_examples/01_cardio_tiny_dataset/extra_params/copy_ome_zarr.json @@ -0,0 +1,3 @@ +{ + "suffix": "mip" +} \ No newline at end of file diff --git a/nextflow_examples/01_cardio_tiny_dataset/run_nextflow.nf b/nextflow_examples/01_cardio_tiny_dataset/run_nextflow.nf new file mode 100644 index 0000000..063193f --- /dev/null +++ b/nextflow_examples/01_cardio_tiny_dataset/run_nextflow.nf @@ -0,0 +1,171 @@ +// Parameters need to be set by the user +// task_root = "/Users/joel/opt/miniconda3/envs/nextflow-fractal/lib/python3.9/site-packages/fractal_tasks_core/" +// def fractal_demos_folder = "/Users/joel/Library/CloudStorage/Dropbox/Joel/FMI/Code/fractal/fractal-demos/" +task_root = "/Users/joel/mambaforge/envs/nextflow-fractal/lib/python3.9/site-packages/fractal_tasks_core/" +def fractal_demos_folder = "/Users/joel/Dropbox/Joel/FMI/Code/fractal/fractal-demos/" + +// static parameters +helper_function = fractal_demos_folder + "nextflow_examples/helper_functions/" + +// Data sources +// input_path = fractal_demos_folder + "examples/images/10.5281_zenodo.7057076" +input_path = fractal_demos_folder + "examples/images/10.5281_zenodo.7059515" +output_path = fractal_demos_folder + "nextflow_examples/01_cardio_tiny_dataset/output" + +process create_ome_zarr { + tag "${sample}" + debug true + + input: + path(input_parameters) + + output: + path("metadata_out.json") + path("*_component.txt") + + script: + """ + # Remove old output folder + rm -rf ${output_path} + # Create the necessary input json files + python ${helper_function}json_helper.py --save_path task_params.json --args_path ${input_parameters} --input_path ${input_path} --output_path ${output_path} + python ${task_root}create_ome_zarr.py -j task_params.json --metadata-out metadata_out.json + python ${helper_function}create_component_files.py --metadata_path metadata_out.json + """ +} + +process yokogawa_to_ome_zarr { + tag "${sample}" + debug true + + input: + path(metadata_path) + path(component) + + output: + path("metadata_out.json") + + script: + """ + component_str=`cat $component` + # Create the necessary input json files + python ${helper_function}json_helper.py --save_path task_params.json --input_path ${output_path} --output_path ${output_path} --metadata_path ${metadata_path} --component \${component_str} + python ${task_root}yokogawa_to_ome_zarr.py -j task_params.json --metadata-out metadata_diff.json + # Add the metadata to the existing metadata + python ${helper_function}combine_metadata.py --metadata_old ${metadata_path} --metadata_diff metadata_diff.json --save_path metadata_out.json + """ +} + + +process copy_ome_zarr { + tag "${sample}" + debug true + + input: + path(metadata_path) + path(input_parameters) + + output: + path("new_metadata_out.json") + path("mip_*_component.txt") + + script: + """ + # Create the necessary input json files + python ${helper_function}json_helper.py --save_path task_params.json --args_path ${input_parameters} --input_path ${output_path} --output_path ${output_path} --metadata_path ${metadata_path} + python ${task_root}copy_ome_zarr.py -j task_params.json --metadata-out metadata_diff.json + python ${helper_function}combine_metadata.py --metadata_old ${metadata_path} --metadata_diff metadata_diff.json --save_path new_metadata_out.json + python ${helper_function}create_component_files.py --metadata_path new_metadata_out.json --filename_prefix mip + """ +} + +process maximum_intensity_projection { + tag "${sample}" + debug true + + input: + path(metadata_path) + path(component) + + output: + path("metadata_out.json") + + script: + """ + component_str=`cat $component` + python ${helper_function}json_helper.py --save_path task_params.json --input_path ${output_path} --output_path ${output_path} --metadata_path ${metadata_path} --component \${component_str} + python ${task_root}maximum_intensity_projection.py -j task_params.json --metadata-out metadata_diff.json + python ${helper_function}combine_metadata.py --metadata_old ${metadata_path} --metadata_diff metadata_diff.json --save_path metadata_out.json + """ +} + +process cellpose_segmentation { + tag "${sample}" + debug true + + input: + path(metadata_path) + path(component) + path(input_parameters) + + output: + path("metadata_out.json") + + script: + """ + component_str=`cat $component` + # Create the necessary input json files + python ${helper_function}json_helper.py --save_path task_params.json --args_path ${input_parameters} --input_path ${output_path} --output_path ${output_path} --metadata_path ${metadata_path} --component \${component_str} + python ${task_root}cellpose_segmentation.py -j task_params.json --metadata-out metadata_diff.json + #touch metadata_out.json + python ${helper_function}combine_metadata.py --metadata_old ${metadata_path} --metadata_diff metadata_diff.json --save_path metadata_out.json + """ +} + +process napari_workflows_wrapper { + tag "${sample}" + debug true + + input: + path(metadata_path) + path(component) + path(input_parameters) + + output: + path("metadata_out.json") + + script: + """ + component_str=`cat $component` + # Create the necessary input json files + python ${helper_function}json_helper.py --save_path task_params.json --args_path ${input_parameters} --input_path ${output_path} --output_path ${output_path} --metadata_path ${metadata_path} --component \${component_str} + python ${task_root}napari_workflows_wrapper.py -j task_params.json --metadata-out metadata_diff.json + # napari workflows doesn't currently create a metadata diff (see https://github.com/fractal-analytics-platform/fractal-tasks-core/issues/357) + touch metadata_out.json + """ +} + +workflow { + def parameter_folder = fractal_demos_folder + "examples/01_cardio_tiny_dataset/Parameters/" + // Parameter files as input + create_ome_zarr_params = Channel.value(parameter_folder + "args_create_ome_zarr.json") + copy_ome_zarr_params = Channel.value(fractal_demos_folder + "nextflow_examples/01_cardio_tiny_dataset/extra_params/copy_ome_zarr.json") + cellpose_params = Channel.value(parameter_folder + "args_cellpose_segmentation.json") + measurement_params = Channel.value(fractal_demos_folder + "nextflow_examples/01_cardio_tiny_dataset/extra_params/args_measurement.json") + + // For example 02 + // def parameter_folder = fractal_demos_folder + "examples/02_cardio_small/Parameters/" + // create_ome_zarr_params = Channel.value(parameter_folder + "create_zarr_structure.json") + // copy_ome_zarr_params = Channel.value(fractal_demos_folder + "nextflow_examples/02_cardio_small/extra_params/copy_ome_zarr.json") + // cellpose_params = Channel.value(parameter_folder + "cellpose_segmentation.json") + // measurement_params = Channel.value(fractal_demos_folder + "nextflow_examples/02_cardio_small/extra_params/args_measurement.json") + + // Take outputs from one task as input of the next where possible + create_ome_zarr_out = create_ome_zarr(create_ome_zarr_params) + yoko_out = yokogawa_to_ome_zarr(create_ome_zarr_out[0], create_ome_zarr_out[1]) + copy_out = copy_ome_zarr(yoko_out, copy_ome_zarr_params) + mip_out = maximum_intensity_projection(copy_out[0], copy_out[1]) + cellpose_out = cellpose_segmentation(mip_out, copy_out[1], cellpose_params) + metadata_mip = napari_workflows_wrapper(cellpose_out, copy_out[1], measurement_params) + +} diff --git a/nextflow_examples/01_cardio_tiny_dataset/run_nextflow_better_metadata_handling.nf b/nextflow_examples/01_cardio_tiny_dataset/run_nextflow_better_metadata_handling.nf new file mode 100644 index 0000000..5f808f9 --- /dev/null +++ b/nextflow_examples/01_cardio_tiny_dataset/run_nextflow_better_metadata_handling.nf @@ -0,0 +1,180 @@ +// Parameters need to be set by the user +// task_root = "/Users/joel/opt/miniconda3/envs/nextflow-fractal/lib/python3.9/site-packages/fractal_tasks_core/" +// def fractal_demos_folder = "/Users/joel/Library/CloudStorage/Dropbox/Joel/FMI/Code/fractal/fractal-demos/" +task_root = "/Users/joel/mambaforge/envs/nextflow-fractal/lib/python3.9/site-packages/fractal_tasks_core/" +def fractal_demos_folder = "/Users/joel/Dropbox/Joel/FMI/Code/fractal/fractal-demos/" + +// static parameters +helper_function = fractal_demos_folder + "nextflow_examples/helper_functions/" + +// Data sources +// input_path = fractal_demos_folder + "examples/images/10.5281_zenodo.7057076" +input_path = fractal_demos_folder + "examples/images/10.5281_zenodo.7059515" +output_path = fractal_demos_folder + "nextflow_examples/01_cardio_tiny_dataset/output" + +process create_ome_zarr { + tag "${sample}" + debug true + + input: + path(input_parameters) + + output: + path("metadata_out.json") + path("*_component.txt") + + script: + """ + # Remove old output folder + rm -rf ${output_path} + # Create the necessary input json files + python ${helper_function}json_helper.py --save_path task_params.json --args_path ${input_parameters} --input_path ${input_path} --output_path ${output_path} + python ${task_root}create_ome_zarr.py -j task_params.json --metadata-out metadata_out.json + python ${helper_function}create_component_files.py --metadata_path metadata_out.json + """ +} + +process yokogawa_to_ome_zarr { + tag "${sample}" + debug true + + input: + path(metadata_path) + path(component) + + output: + path("dummy.txt") + + script: + """ + component_str=`cat $component` + # Create the necessary input json files + python ${helper_function}json_helper.py --save_path task_params.json --input_path ${output_path} --output_path ${output_path} --metadata_path ${metadata_path} --component \${component_str} + python ${task_root}yokogawa_to_ome_zarr.py -j task_params.json --metadata-out metadata_diff.json + touch dummy.txt + # Add the metadata to the existing metadata + # python ${helper_function}combine_metadata.py --metadata_old ${metadata_path} --metadata_diff metadata_diff.json --save_path metadata_out.json + """ +} + + +process copy_ome_zarr { + tag "${sample}" + debug true + + input: + path(metadata_path) + path(input_parameters) + val dummy + + output: + path("new_metadata_out.json") + path("mip_*_component.txt") + + script: + """ + # Create the necessary input json files + python ${helper_function}json_helper.py --save_path task_params.json --args_path ${input_parameters} --input_path ${output_path} --output_path ${output_path} --metadata_path ${metadata_path} + python ${task_root}copy_ome_zarr.py -j task_params.json --metadata-out metadata_diff.json + python ${helper_function}combine_metadata.py --metadata_old ${metadata_path} --metadata_diff metadata_diff.json --save_path new_metadata_out.json + python ${helper_function}create_component_files.py --metadata_path new_metadata_out.json --filename_prefix mip + """ +} + +process maximum_intensity_projection { + tag "${sample}" + debug true + + input: + path(metadata_path) + path(component) + + output: + path("dummy.txt") + + script: + """ + component_str=`cat $component` + python ${helper_function}json_helper.py --save_path task_params.json --input_path ${output_path} --output_path ${output_path} --metadata_path ${metadata_path} --component \${component_str} + python ${task_root}maximum_intensity_projection.py -j task_params.json --metadata-out metadata_diff.json + touch dummy.txt + #python ${helper_function}combine_metadata.py --metadata_old ${metadata_path} --metadata_diff metadata_diff.json --save_path metadata_out.json + """ +} + +process cellpose_segmentation { + tag "${sample}" + debug true + + input: + path(metadata_path) + path(component) + path(input_parameters) + val dummy + + output: + path("dummy.txt") + + script: + """ + component_str=`cat $component` + # Create the necessary input json files + python ${helper_function}json_helper.py --save_path task_params.json --args_path ${input_parameters} --input_path ${output_path} --output_path ${output_path} --metadata_path ${metadata_path} --component \${component_str} + python ${task_root}cellpose_segmentation.py -j task_params.json --metadata-out metadata_diff.json + #python ${helper_function}combine_metadata.py --metadata_old ${metadata_path} --metadata_diff metadata_diff.json --save_path metadata_out.json + touch dummy.txt + """ +} + +process napari_workflows_wrapper { + tag "${sample}" + debug true + + input: + path(metadata_path) + path(component) + path(input_parameters) + val dummy + + output: + path("dummy.txt") + + script: + """ + component_str=`cat $component` + # Create the necessary input json files + python ${helper_function}json_helper.py --save_path task_params.json --args_path ${input_parameters} --input_path ${output_path} --output_path ${output_path} --metadata_path ${metadata_path} --component \${component_str} + python ${task_root}napari_workflows_wrapper.py -j task_params.json --metadata-out metadata_diff.json + # napari workflows doesn't currently create a metadata diff (see https://github.com/fractal-analytics-platform/fractal-tasks-core/issues/357) + touch dummy.txt + """ +} + +workflow { + def parameter_folder = fractal_demos_folder + "examples/01_cardio_tiny_dataset/Parameters/" + // Parameter files as input + create_ome_zarr_params = Channel.value(parameter_folder + "args_create_ome_zarr.json") + copy_ome_zarr_params = Channel.value(fractal_demos_folder + "nextflow_examples/01_cardio_tiny_dataset/extra_params/copy_ome_zarr.json") + cellpose_params = Channel.value(parameter_folder + "args_cellpose_segmentation.json") + measurement_params = Channel.value(fractal_demos_folder + "nextflow_examples/01_cardio_tiny_dataset/extra_params/args_measurement.json") + + // For example 02 + // def parameter_folder = fractal_demos_folder + "examples/02_cardio_small/Parameters/" + // create_ome_zarr_params = Channel.value(parameter_folder + "create_zarr_structure.json") + // copy_ome_zarr_params = Channel.value(fractal_demos_folder + "nextflow_examples/02_cardio_small/extra_params/copy_ome_zarr.json") + // cellpose_params = Channel.value(parameter_folder + "cellpose_segmentation.json") + // measurement_params = Channel.value(fractal_demos_folder + "nextflow_examples/02_cardio_small/extra_params/args_measurement.json") + + // Generate the metadata & components from the plate-level tasks, don't update them in other tasks + create_ome_zarr_out = create_ome_zarr(create_ome_zarr_params) + metadata = create_ome_zarr_out[0] + component_list = create_ome_zarr_out[1] + dummy = yokogawa_to_ome_zarr(metadata, component_list) + copy_out = copy_ome_zarr(metadata, copy_ome_zarr_params, dummy) + metadata_mip = copy_out[0] + component_list_mip = copy_out[1] + dummy2 = maximum_intensity_projection(metadata_mip, component_list_mip) + dummy3 = cellpose_segmentation(metadata_mip, component_list_mip, cellpose_params, dummy2) + metadata_mip = napari_workflows_wrapper(metadata_mip, component_list_mip, measurement_params, dummy3) + +} diff --git a/nextflow_examples/README.md b/nextflow_examples/README.md new file mode 100644 index 0000000..9432e2b --- /dev/null +++ b/nextflow_examples/README.md @@ -0,0 +1,11 @@ +# Installation +1. Create a conda environment: `conda create --name nextflow-fractal python=3.9 -y` +2. Activate the environment: `conda activate nextflow-fractal` +3. Install nextflow: `conda install -c bioconda nextflow -y` +4. (On Macs: Install imagecodecs separately: `conda install imagecodecs -y`) +5. Install fractal-tasks-core in the version you want: `pip install fractal-tasks-core` +6. (Install nomkl to avoid `OMP: Error #15: Initializing libomp.dylib, but found libomp.dylib already initialized.`: `conda install nomkl -y`) +7. Get the base folder where fractal-tasks-core Python files are put (e.g. `/Users/joel/opt/miniconda3/envs/nextflow-fractal/lib/python3.9/site-packages/fractal_tasks_core/` => in the conda environment) or download the tasks separately to a known folder. This folder will be needed in the nextflow scripts. + + + diff --git a/nextflow_examples/helper_functions/combine_metadata.py b/nextflow_examples/helper_functions/combine_metadata.py new file mode 100644 index 0000000..22605ef --- /dev/null +++ b/nextflow_examples/helper_functions/combine_metadata.py @@ -0,0 +1,33 @@ +import json_helper +import json +import argparse + +def combine_metadata(metadata_old, metadata_diff, save_path): + metadata_dict = json_helper.load_json(metadata_old) + metadata_diff_dict = json_helper.load_json(metadata_diff) + for key, value in metadata_diff_dict.items(): + metadata_dict[key] = value + + with open(save_path, 'w') as outfile: + outfile.write(json.dumps(metadata_dict, indent=4)) + +def parse_args(): + parser = argparse.ArgumentParser( + prog='Json Helper', + description='Combines JSON files for Fractal', + ) + parser.add_argument('--metadata_old') + parser.add_argument('--metadata_diff') + parser.add_argument('--save_path') + + args=parser.parse_args() + return args + + +if __name__ == '__main__': + inputs = parse_args() + combine_metadata( + inputs.metadata_old, + inputs.metadata_diff, + inputs.save_path, + ) \ No newline at end of file diff --git a/nextflow_examples/helper_functions/create_component_files.py b/nextflow_examples/helper_functions/create_component_files.py new file mode 100644 index 0000000..bc49fc2 --- /dev/null +++ b/nextflow_examples/helper_functions/create_component_files.py @@ -0,0 +1,35 @@ +import json_helper +import json +import argparse + +def create_component_files(metadata_path, filename_prefix=None): + # Creating tiny text files just containing the name of a component + # Silly way to do things, but couldn't find a better way to pass arguments + # between Nextflow steps + metadata_dict = json_helper.load_json(metadata_path) + for i, component in enumerate(metadata_dict['image']): + if filename_prefix: + filename = f"{filename_prefix}_{i}_component.txt" + else: + filename = f"{i}_component.txt" + with open(filename, 'w') as outfile: + outfile.write(component) + +def parse_args(): + parser = argparse.ArgumentParser( + prog='Component Writer', + description='Stores each component in its own file', + ) + parser.add_argument('--metadata_path') + parser.add_argument('--filename_prefix') + + args=parser.parse_args() + return args + + +if __name__ == '__main__': + inputs = parse_args() + create_component_files( + inputs.metadata_path, + inputs.filename_prefix + ) \ No newline at end of file diff --git a/nextflow_examples/helper_functions/json_helper.py b/nextflow_examples/helper_functions/json_helper.py new file mode 100644 index 0000000..7278b71 --- /dev/null +++ b/nextflow_examples/helper_functions/json_helper.py @@ -0,0 +1,63 @@ +import json +import argparse + +def load_json(path): + with open(path, 'r') as json_file: + data = json.load(json_file) + + return data + +def combine_parameters(params_dict, input_path, output_path, metadata_path=None, component = None): + if metadata_path: + metadata_dict = load_json(metadata_path) + else: + metadata_dict={} + + new_dict = { + "input_paths": [input_path], + "output_path": output_path, + "metadata": metadata_dict + } + + if component: + new_dict["component"] = component + + combined_dict = new_dict | params_dict + + return combined_dict + +def create_combined_json(save_path, args_path, input_path, output_path, metadata_path=None, component = None): + if args_path: + params_dict = load_json(args_path) + else: + params_dict = {} + combined_dict = combine_parameters(params_dict, input_path, output_path, metadata_path, component) + with open(save_path, 'w') as outfile: + outfile.write(json.dumps(combined_dict, indent=4)) + +def parse_args(): + parser = argparse.ArgumentParser( + prog='Json Helper', + description='Combines JSON files for Fractal', + ) + parser.add_argument('--save_path') + parser.add_argument('--args_path') + parser.add_argument('--input_path') + parser.add_argument('--output_path') + parser.add_argument('--metadata_path') + parser.add_argument('--component') + + args=parser.parse_args() + return args + + +if __name__ == '__main__': + inputs = parse_args() + create_combined_json( + inputs.save_path, + inputs.args_path, + inputs.input_path, + inputs.output_path, + inputs.metadata_path, + inputs.component + )