khersameesh24
diff --git a/‎.nf-core.yml‎
Lines changed: 10 additions & 0 deletions b/‎.nf-core.yml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 19 additions & 4 deletions b/‎README.md‎
Lines changed: 19 additions & 4 deletions
diff --git a/‎assets/config/xenium.toml‎
Lines changed: 15 additions & 0 deletions b/‎assets/config/xenium.toml‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎assets/example_samplesheet.csv‎
Lines changed: 2 additions & 0 deletions b/‎assets/example_samplesheet.csv‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎assets/samplesheet.csv‎
Lines changed: 2 additions & 1 deletion b/‎assets/samplesheet.csv‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎conf/modules.config‎
Lines changed: 2 additions & 51 deletions b/‎conf/modules.config‎
Lines changed: 2 additions & 51 deletions
diff --git a/‎conf/test.config‎
Lines changed: 15 additions & 5 deletions b/‎conf/test.config‎
Lines changed: 15 additions & 5 deletions
diff --git a/‎conf/test_full.config‎
Lines changed: 1 addition & 0 deletions b/‎conf/test_full.config‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/usage.md‎
Lines changed: 67 additions & 33 deletions b/‎docs/usage.md‎
Lines changed: 67 additions & 33 deletions
diff --git a/‎modules/local/baysor/create_dataset/main.nf‎
Lines changed: 1 addition & 3 deletions b/‎modules/local/baysor/create_dataset/main.nf‎
Lines changed: 1 addition & 3 deletions
@@ -1,3 +1,13 @@
+lint:
+  actions_ci: false
+  files_exist:
+    - .github/workflows/awsfulltest.yml
+    - .github/workflows/awstest.yml
+  files_unchanged:
+    - .gitignore
+    - assets/nf-core-spatialxe_logo_light.png
+    - docs/images/nf-core-spatialxe_logo_dark.png
+    - docs/images/nf-core-spatialxe_logo_light.png
 nf_core_version: 3.2.1
 repository_type: pipeline
 template:
 
@@ -40,20 +40,35 @@ On release, automated continuous integration tests run the pipeline on a full-si
 
 ```csv
 sample,bundle,image
-test_sample,/path/to/xenium-bundle/,/path/to/morphology.ome.tif
+test_sample,/path/to/xenium-bundle,/path/to/morphology.ome.tif
 ```
 
 Now, you can run the pipeline using:
 
 <!-- TODO nf-core: update the following command to include all required parameters for a minimal example -->
 
+## Run image-based segmentation mode <br>
+
+`CELLPOSE -> BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC`
+
+```bash
+nextflow run nf-core/spatialxe \
+   -profile <docker/singularity/.../institute> \
+   --input samplesheet.csv \
+   --outdir <OUTDIR> \
+   --mode image
+```
+
+## Run coordinate-based segmentation mode <br>
+
+`PROSEG -> BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC`
+
 ```bash
 nextflow run nf-core/spatialxe \
    -profile <docker/singularity/.../institute> \
    --input samplesheet.csv \
    --outdir <OUTDIR> \
-   --imgage_based \
-   --segmentation cellpose
+   --mode coordinate
 ```
 
 > [!WARNING]
@@ -69,7 +84,7 @@ For more details about the output files and reports, please refer to the
 
 ## Credits
 
-nf-core/spatialxe was originally written by [Sameesh Kher](https://github.com/khersameesh24) and  [Florian Heyl](https://github.com/heylf).
+nf-core/spatialxe was originally written by [Sameesh Kher](https://github.com/khersameesh24) and [Florian Heyl](https://github.com/heylf).
 
 We thank the following people for their extensive assistance in the development of this pipeline:
 
 
@@ -0,0 +1,15 @@
+[data]
+x = "x_location"
+y = "y_location"
+z = "z_location"
+gene = "feature_name"
+min_molecules_per_gene = 10
+exclude_genes = "NegControl*,BLANK_*,antisense_*"
+min_molecules_per_cell = 50
+
+[segmentation]
+unassigned_prior_label = "UNASSIGNED"
+prior_segmentation_confidence = 0.5
+
+[plotting]
+min_pixels_per_cell = 10
@@ -0,0 +1,2 @@
+sample,bundle,image
+xenium_prime_mouse_ileum,/home/user/raw_data/xenium/Xenium_Prime_Mouse_Ileum_tiny_outs,/home/user/raw_data/xenium/Xenium_Prime_Mouse_Ileum_tiny_outs/morphology.ome.tif
@@ -1 +1,2 @@
-test_run,https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.tar.gz,morphology.ome.tif
+sample,bundle,image
+test_run,https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.tar.gz,
@@ -46,73 +46,27 @@ process {
             path: { "${params.outdir}/baysor/run" },
             mode: params.publish_dir_mode,
         ]
-        version = "0.7.1"
-        baysor_xenium_config =
-        """
-        [data]
-        x = \\"x_location\\"
-        y = \\"y_location\\"
-        z = \\"z_location\\"
-        gene = \\"feature_name\\"
-        min_molecules_per_gene = 10
-        exclude_genes = \\"NegControl*,BLANK_*,antisense_*\\"
-        min_molecules_per_cell = 50
-
-        [segmentation]
-        unassigned_prior_label = \\"UNASSIGNED\\"
-        prior_segmentation_confidence = 0.5
-
-        [plotting]
-        min_pixels_per_cell = 10
-        """
     }
 
     withName: BAYSOR_SEGFREE {
         publishDir = [
             path: { "${params.outdir}/baysor/segfree" },
             mode: params.publish_dir_mode,
         ]
-        version = "0.7.1"
-        baysor_xenium_config =
-        """
-        [data]
-        x = \\"x_location\\"
-        y = \\"y_location\\"
-        z = \\"z_location\\"
-        gene = \\"feature_name\\"
-        min_molecules_per_cell = 50
-
-        [plotting]
-        min_pixels_per_cell = 10
-        """
     }
 
     withName: BAYSOR_CREATE_DATASET {
         publishDir = [
             path: { "${params.outdir}/baysor/create_dataset" },
             mode: params.publish_dir_mode,
         ]
-        version = "0.7.1"
     }
 
     withName: BAYSOR_PREVIEW {
         publishDir = [
             path: { "${params.outdir}/baysor/preview" },
             mode: params.publish_dir_mode,
         ]
-        version = "0.7.1"
-        baysor_xenium_config =
-        """
-        [data]
-        x = \\"x_location\\"
-        y = \\"y_location\\"
-        z = \\"z_location\\"
-        gene = \\"feature_name\\"
-        min_molecules_per_cell = 50
-
-        [plotting]
-        min_pixels_per_cell = 10
-        """
     }
 
     withName: SEGGER_CREATE_DATASET {
@@ -122,7 +76,6 @@ process {
         ]
         tile_width = "120"
         tile_height = "120"
-        version = "0.1.0"
     }
 
     withName: SEGGER_TRAIN {
@@ -133,8 +86,7 @@ process {
         batch_size = 4                             // larger batch size can speed up training, but requires more memory
         devices = 4                                // Use multiple GPUs by increasing the devices parameter to further accelerate training
         max_epochs = 200                           // increasing #epochs can improve model performance with more learning cycles, but extends training time
-        ext.args = "--init_emb 8 --hidden_channels 32 --num_tx_tokens 500 --out_channels 8 --heads 2 --num_mid_layers 2 --strategy auto --precision 16-mixed"
-        version = "0.1.0"
+        ext.args = { "--init_emb 8 --hidden_channels 32 --num_tx_tokens 500 --out_channels 8 --heads 2 --num_mid_layers 2 --strategy auto --precision 16-mixed" }
     }
 
     withName: SEGGER_PREDICT {
@@ -144,7 +96,6 @@ process {
         ]
         batch_size = 1                            // larger batch size can speed up training, but requires more memory
         cc_analysis = "false"                     // to control connected component analysis
-        version = "0.1.0"
     }
 
     withName: PARQUET_TO_CSV {
@@ -180,7 +131,7 @@ process {
             path: { "${params.outdir}/cellpose" },
             mode: params.publish_dir_mode,
         ]
-        ext.args = "--diameter 9 --channel_axis 0 --save_flows"
+        ext.args = { "--pretrained_model nuclei --diameter 9 --channel_axis 0 --save_flows" }
     }
 
 }
@@ -11,19 +11,29 @@
 */
 
 process {
-    resourceLimits = [
+
+    withLabel: process_high {
+        resourceLimits = [
         cpus: 8,
-        memory: '16.GB',
+        memory: '8.GB',
         time: '1.h'
-    ]
+        ]
+    }
+
+    withName: CELLPOSE {
+        resourceLimits = [
+            cpus: 4,
+            memory: '8.GB'
+        ]
+    }
 }
 
 params {
     config_profile_name        = 'Test profile'
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
-        // Input data
+    // Input data
     input  = "${projectDir}/assets/samplesheet.csv"
     outdir = 'results'
-
+    mode   = 'image'
 }
@@ -17,4 +17,5 @@ params {
     // Input data
     input  = "${projectDir}/assets/samplesheet.csv"
     outdir = 'results'
+    mode   = 'image'
 }
@@ -6,58 +6,93 @@
 
 ## Introduction
 
-<!-- TODO nf-core: Add documentation about anything specific to running your pipeline. For general topics, please point to (and add to) the main nf-core website. -->
-
 ## Samplesheet input
 
-You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below.
+You will need to create a samplesheet with information about the sample you would like to analyse before running the pipeline. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below.
+
+```csv title="samplesheet.csv"
+sample,bundle,image
+breast_cancer,/path/to/xenium/bundle,/path/to/morphology.ome.tif
+```
+
+| Column   | Description                                                                                                                                                            |
+| -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `sample` | `Required`. Custom sample name. It is recommended to follow the same name from the output of the Xenium Onboard Analysis (XOA). Avoid using spaces in the sample name. |
+| `bundle` | `Required`. Full path to the Xenium bundle, output of the Xenium Onboard Analysis.                                                                                     |
+| `image`  | `Optional`. Full path to morphology.ome.tif. If not provided, the morphology.ome.tif from the bundle is considered.                                                    |
+
+An [example samplesheet](../assets/example_samplesheet.csv) has been provided with the pipeline.
+
+#### Using the samplesheet
 
 ```bash
 --input '[path to samplesheet file]'
 ```
 
-### Multiple runs of the same sample
+## Running the pipeline
+
+The typical command for running the pipeline is as follows:
 
-The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes:
+#### Image-based segmentation mode
 
-```csv title="samplesheet.csv"
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
-CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz
-CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz
-```
+This runs the default image mode:<br>
+`CELLPOSE ➔ BAYSOR ➔ XR-IMPORT-SEGMENTATION ➔ SPATIALDATA ➔ QC`
 
-### Full samplesheet
+```bash
+nextflow run nf-core/spatialxe \
+        --input ./samplesheet.csv \
+        --outdir ./results \
+        --mode image \
+        -profile <docker/singularity/...>
+```
 
-The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below.
+#### Coordinate-based (transcripts-based) segmentation mode
 
-A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice.
+This runs the default coordinate mode:<br>
+`PROSEG ➔ PROSEG2BAYSOR ➔ XR-IMPORT-SEGMENTATION ➔ SPATIALDATA ➔ QC`
 
-```csv title="samplesheet.csv"
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
-CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz
-CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz
-TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,
-TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,
-TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,
-TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,
+```bash
+nextflow run nf-core/spatialxe \
+        --input ./samplesheet.csv \
+        --outdir ./results \
+        --mode coordinate \
+        -profile <docker/singularity/...>
 ```
 
-| Column    | Description                                                                                                                                                                            |
-| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `sample`  | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
-| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
-| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
+### Image-based Segmentation mode (--mode image): <br>
 
-An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
+- cellpose
+- baysor
+- xeniumranger
 
-## Running the pipeline
+### Coordinate-based (transcripts-based) Segmentation methods (--mode coordinate): <br>
 
-The typical command for running the pipeline is as follows:
+- proseg
+- baysor
+- segger
+
+#### Run Segmentation with the methods methods mentioned above : <br>
+
+eg: To run proseg segmentation use the `coordinate` mode and the `proseg` segmentation method
+
+```bash
+nextflow run nf-core/spatialxe \
+        --input ./samplesheet.csv \
+        --outdir ./results \
+        --mode coordinate \
+        --segmentation proseg \
+        -profile <docker/singularity/...>
+```
+
+eg: To run cellpose segmentation use the `image` mode and the `cellpose` segmentation method
 
 ```bash
-nextflow run nf-core/spatialxe --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker
+nextflow run nf-core/spatialxe \
+        --input ./samplesheet.csv \
+        --outdir ./results \
+        --mode image \
+        --segmentation cellpose \
+        -profile <docker/singularity/...>
 ```
 
 This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
@@ -89,7 +124,6 @@ with:
 ```yaml title="params.yaml"
 input: './samplesheet.csv'
 outdir: './results/'
-genome: 'GRCh37'
 <...>
 ```
 
 
@@ -20,7 +20,6 @@ process BAYSOR_CREATE_DATASET {
     if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
         error "BAYSOR_CREATE_DATASET module does not support Conda. Please use Docker / Singularity / Podman instead."
     }
-    def VERSION = "${task.version}"
 
     template 'create_dataset.py'
 
@@ -29,14 +28,13 @@ process BAYSOR_CREATE_DATASET {
     if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
         error "BAYSOR_CREATE_DATASET module does not support Conda. Please use Docker / Singularity / Podman instead."
     }
-    def VERSION = "${task.version}"
 
     """
     touch sampled_transcripts.csv
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        Baysor-Preview Create Dataset: $VERSION
+        baysor: 0.7.1
     END_VERSIONS
     """
 }
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+sample,bundle,image`
	`2`	`+xenium_prime_mouse_ileum,/home/user/raw_data/xenium/Xenium_Prime_Mouse_Ileum_tiny_outs,/home/user/raw_data/xenium/Xenium_Prime_Mouse_Ileum_tiny_outs/morphology.ome.tif`
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`		`-test_run,https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.tar.gz,morphology.ome.tif`
	`1`	`+sample,bundle,image`
	`2`	`+test_run,https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.tar.gz,`
Original file line number	Diff line number	Diff line change
`@@ -17,4 +17,5 @@ params {`
`17`	`17`	`// Input data`
`18`	`18`	`input = "${projectDir}/assets/samplesheet.csv"`
`19`	`19`	`outdir = 'results'`
	`20`	`+ mode = 'image'`
`20`	`21`	`}`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,6 @@ process BAYSOR_CREATE_DATASET {`
`20`	`20`	`if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {`
`21`	`21`	`error "BAYSOR_CREATE_DATASET module does not support Conda. Please use Docker / Singularity / Podman instead."`
`22`	`22`	`}`
`23`		`- def VERSION = "${task.version}"`
`24`	`23`
`25`	`24`	`template 'create_dataset.py'`
`26`	`25`
`@@ -29,14 +28,13 @@ process BAYSOR_CREATE_DATASET {`
`29`	`28`	`if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {`
`30`	`29`	`error "BAYSOR_CREATE_DATASET module does not support Conda. Please use Docker / Singularity / Podman instead."`
`31`	`30`	`}`
`32`		`- def VERSION = "${task.version}"`
`33`	`31`
`34`	`32`	`"""`
`35`	`33`	`touch sampled_transcripts.csv`
`36`	`34`
`37`	`35`	`cat <<-END_VERSIONS > versions.yml`
`38`	`36`	`"${task.process}":`
`39`		`- Baysor-Preview Create Dataset: $VERSION`
	`37`	`+ baysor: 0.7.1`
`40`	`38`	`END_VERSIONS`
`41`	`39`	`"""`
`42`	`40`	`}`