STITCH/nextflow.config.Visium.human at main · dimi-lab/STITCH · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
// Default configs

params {
  // Generic parameters
  generic_workflowpath = "${projectDir}"
  // Full path to samplesheet.tsv file
  generic_samplesheet = "NA"
  // author name to add to the final report
  generic_authorname = "Liu,Leo"
  // one of scRNAseq, Visium, VisiumHD, Seeker, Trekker, Stereoseq
  generic_data_type = "Visium"
  // Full path to a .txt file containing a list of genes,one gene per line
  // set to "NA" to disable
  generic_feature_list = "NA"
  // Full path to output directory
  generic_output_dir = "path/to/output/"
  // Full path to gene level annotation file. This is used to add feature level meta data
  generic_geneinfo = "${projectDir}/docs/human_gene_info_2020A.tsv"
  // Full path to rRNA gene list file. This is used to calculate rRNA%. Set to NA to disable calculation or filtering based on rRNA%
  generic_rrna_list = "${projectDir}/docs/human_rRNA_genes.tsv"

  // Parallel parameters
  // Used by DECONVOLUTION, INTEGRATESAMPLES and MERGESAMPLES
  // Strategy can be either multisession or multicore
  generic_parallel_strategy = "multisession"
  generic_nworkers = 5

  // QC
  qc_qc_only = true
  // "0" or "1" to indicate whether to perform ambient RNA removal/correction using SoupX
  // only applicable to scRNAseq
  qc_ambient_RNA_removal_flag = "0"
  // "0" or "1" to indicate whether to perfrom doublet removal using scDblFinder
  // only applicable to scRNAseq
  qc_doublet_removal_flag = "0"
  // "0" or "1" to indicate whether to apply adaptive cutoff idenfication (based on IQR)
  qc_adaptive_cutoff_flag = "0"
  // Cutoff for percentage of mitochondria concentration
  // cells with values higher than the cutoff will be removed
  qc_mt_cutoff = 40
  // Cutoff for percentage of hemoglobin concentration
  // cells with values higher than the cutoff will be removed
  qc_hb_cutoff = 20
  // Cutoff for percentage of rRNA concentration
  // cells with values higher than the cutoff will be removed
  qc_rrna_cutoff = 40
  // Cutoff for total number of detectable genes/features
  // cells with values lower than the cutoff will be removed
  qc_nFeature_cutoff = 50
  // Cutoff for total number of UMI counts
  // cells with values lower than the cutoff will be removed
  qc_nCount_cutoff = 200
  // Cutoff for number of cells with expression for feature/gene
  // genes/features with values lower than the cutoff will be removed
  qc_nCell_cutoff = 10
  // Seeker only: background removal parameters
  // Chip size in um (3000 for 3x3 mm, 10000 for 10x10 mm)
  qc_seeker_chip_size_um = 10000
  // Minimum log10(UMI) threshold for background removal
  qc_seeker_min_log10_umi = 1.4
  // Coarse neighbourhood width in um
  qc_seeker_m = 40
  // Fine neighbourhood width in um
  qc_seeker_n = 100
  // Minimum bead count per coarse neighbourhood
  qc_seeker_min_beads_coarse = 5
  // Minimum bead count per fine neighbourhood
  qc_seeker_min_beads_fine = 10

  // Normalization
  // normalization method for dimension reduction and clustering, either SCT, LogNormalize, scran, SpaNorm, or TFIDF. SpaNorm is not available for scRNAseq data type.
  norm_norm_dimreduc = "LogNormalize"
  // normalization method for differential testing, either SCT, LogNormalize, scran, SpaNorm, or TFIDF. SpaNorm is not available for scRNAseq data type.
  norm_norm_diff = "LogNormalize"
  // adjustment method for SpaNorm, can be one of logpac, pearson, meanbio, medbio, or NA (scRNAseq)
  norm_spanorm_adj_method = "NA"
  // cell-cycle
  // "0" or "1" to indicate whether to estimate and correct for cell-cycle effect.
  norm_cellcycle_correction_flag = "0"
  // Full path to gene list (Gene symbols) for cell-cycle S-phase, one gene per line
  // required if cellcycle_correction is set to "1"
  norm_genelist_S_phase = "${projectDir}/docs/S_genes_human.tsv"
  // Full path to gene list (gene symbols) for cell-cycle G2M-phase, one gene per line
  // required if cellcycle_correction is set to "1"
  norm_genelist_G2M_phase = "${projectDir}/docs/G2M_genes_human.tsv"

  // Analysis to identify spatially variable genes (SVGs)
  // Only applicable to spatial data
  // Method to use for SVG idenfication, either markvariogram, moransi, SparkX or HVG
  cluster_svg_method = "moransi"

  // Deconvolution-analysis parameters
  // Deconvlution analysis is only applies to spatial data
  // Only one of deconvolution and mapping analysis can be enabled, not both
  decon_deconvolution_analysis = false
  // Full path to reference data, either a seurat object in .rds format or a anndata object in .h5ad format
  decon_reference_path = "NA"
  // Assay of the reference used for deconvolution
  decon_reference_assay = "RNA"
  // Assay of the query used for deconvolution
  decon_query_assay = "Spatial"
  // Character indicating reference data, e.g. cell type, to use for deconvolution
  decon_refdata = "NA"
  // doublet mode for RCTD, can be either doublet, multi or full
  decon_rctd_doublet_mode = "full"
  // optional gene list used for deconvolution, one gene per line
  // set to "NA" to use default strategy used by RCTD
  decon_rctd_gene_list_reg = "NA"

  // Mapping-analysis parameters
  // Mapping analysis applies to both scRNAseq and spatial data
  // Only one of deconvolution and mapping analysis can be enabled, not both
  // Mapping is performed using transfer anchors from Seurat
  map_mapping_analysis = false
  // Full path to reference seurat object in .rds format
  map_reference_path = "NA"
  // Assay to use to perform mapping for reference
  map_reference_assay = "RNA"
  // Assay to use to perform mapping for query
  map_query_assay = "RNA"
  // Character or characters seperated by ',' indicating reference data to use for mapping
  map_refdata = "NA"
  // Name of dimensional reduction to use from the reference
  map_reference_reduction = "pca"
  // Name of normalization method used: LogNormalize or SCT
  // SCT assay for integrated reference is currently not compatible
  map_seurat_normalization_method = "LogNormalize"
  // 0 or 1 indicating whether to add prediction assay to the seurat object
  map_seurat_prediction_assay = "1"
  // DimReduc object to use from the reference data to project UMAP
  map_seurat_reduction_model = "umap"

  // Strategy to combine samples
  // whether to perform merge-based analysis
  combine_merge_analysis = false
  // whether to perform integration-based analysis
  combine_integration_analysis = true
  // if merge_analysis is enabled, whether to stop after merge-based analysis. Could be useful if you want to evaluate parameters like resolution.
  combine_merge_only = false
  // if integration_analysis is enabled, whether to stop after integration-based analysis. Could be useful if you want to evaluate parameters like resolution.
  combine_integration_only = false

  // Integration strategy
  // cca, rpca, harmony, fastmnn, scvi or cellcharter
  combine_integration_method = "harmony"
  // "0" or "1" to indicate whether to perform sketch-based workflow. Recommend to enable for large number of samples/cells
  combine_sketch_flag = "0"
  // "0" or "1" to indicate whether to use bpcells. Recommend to enable for large number of samples/cells. Note that if bpcells is enabled, only LogNormalize can be used as normalization method.
  combine_bpcells_flag = "0"

  // Clustering
  // Resolution parameter used to identify number of clusters
  cluster_resolution = 0.8
  // Method used for visualization, either tsne or umap
  cluster_vismethod = "umap"
  // method for embeddings, set to 'NA' to use default PCA. For scRNA-seq
  // can be set to 'geneformer'; for spatial data, can be set to one of 'banksy', 'stagate', 'spatialpca' 'spatialleiden', 'cellcharter'
  cluster_embed_method = "NA"
  // method for clustering
  // can be one of 'louvain', 'leiden'
  cluster_cluster_method = "leiden"
  // lambda parameter for Banksy, larger values yield more spatially coherent domains. For Visuim, recommend to use 0.2 for domain segmentation. For Visuim HD, use 0.2 for cell typing, and 0.8 for domain segmentation
  // only applicable to spatial data
  cluster_banksy_lamda = 0.2
  // k_geom parameter for Banksy, larger values will yield larger domains. Recommend to use 18 for Visium data
  // only applicable to spatial data
  cluster_banksy_k_geom = 18

  // Differential expression
  // Identity to group the cells to perform DE comparisons
  // Could be one of "cluster" (unsupervised clusters), "decon_cell_type" (if deconvolution analysis is enabled), "map_cell_type" (if mapping-based analysis is enabled)
  diff_idents = "cluster"
  // 0 or 1 indicating whether to perform pseudo-bulk
  // based analysis. Could be helpful if there are many samples per condition
  diff_pseudobulk_flag = "0"
  // Character value specifying control group for differential expression analysis
  // Set to NA to disable
  diff_control_var = "NA"
  // Character value specifying case group for differential expression analysis
  // Set to NA to disable
  diff_case_var = "NA"
  // Covariates to adjust, when performing differential analysis between conditions
  // values should be from column names from sampleinfo file
  diff_covariate_list = "NA"
  // Denotes which statistical test to use
  diff_test = "wilcox"
  // fold change cutoff to identify differentially expressed genes
  diff_fc = 2
  // p value cutoff to identify differentially expressed genes
  diff_pval = 0.01
  // "0" or "1" to indicate whether to use Bonferroni adjusted p value
  diff_pval_flag = "1"
  // percentage of experssion cutoff to identify differentially expressed genes
  diff_pct=20
}

profiles {
    // Default profile (local execution)
    local {
      process.executor = 'local'
      process.memory = '100Gb'
      process.INTEGRATESAMPLES.memory = '200G'
      workDir = './work'
    }

    // SLURM execution profile
    slurm {
      process.executor = 'slurm'
      process.queue = 'cpu-short'
      //process.memory = '100Gb'
      //process.INTEGRATESAMPLES.memory = '200G'
      process.clusterOptions = '--cpus-per-task 10  --mem 200G'
      process.time = '6h'
      workDir = './work'
    }
}

includeConfig "${projectDir}/common.config"