-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnextflow.config.Visium.human
More file actions
218 lines (204 loc) · 9.91 KB
/
nextflow.config.Visium.human
File metadata and controls
218 lines (204 loc) · 9.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
// Default configs
params {
// Generic parameters
generic_workflowpath = "${projectDir}"
// Full path to samplesheet.tsv file
generic_samplesheet = "NA"
// author name to add to the final report
generic_authorname = "Liu,Leo"
// one of scRNAseq, Visium, VisiumHD, Seeker, Trekker, Stereoseq
generic_data_type = "Visium"
// Full path to a .txt file containing a list of genes,one gene per line
// set to "NA" to disable
generic_feature_list = "NA"
// Full path to output directory
generic_output_dir = "path/to/output/"
// Full path to gene level annotation file. This is used to add feature level meta data
generic_geneinfo = "${projectDir}/docs/human_gene_info_2020A.tsv"
// Full path to rRNA gene list file. This is used to calculate rRNA%. Set to NA to disable calculation or filtering based on rRNA%
generic_rrna_list = "${projectDir}/docs/human_rRNA_genes.tsv"
// Parallel parameters
// Used by DECONVOLUTION, INTEGRATESAMPLES and MERGESAMPLES
// Strategy can be either multisession or multicore
generic_parallel_strategy = "multisession"
generic_nworkers = 5
// QC
qc_qc_only = true
// "0" or "1" to indicate whether to perform ambient RNA removal/correction using SoupX
// only applicable to scRNAseq
qc_ambient_RNA_removal_flag = "0"
// "0" or "1" to indicate whether to perfrom doublet removal using scDblFinder
// only applicable to scRNAseq
qc_doublet_removal_flag = "0"
// "0" or "1" to indicate whether to apply adaptive cutoff idenfication (based on IQR)
qc_adaptive_cutoff_flag = "0"
// Cutoff for percentage of mitochondria concentration
// cells with values higher than the cutoff will be removed
qc_mt_cutoff = 40
// Cutoff for percentage of hemoglobin concentration
// cells with values higher than the cutoff will be removed
qc_hb_cutoff = 20
// Cutoff for percentage of rRNA concentration
// cells with values higher than the cutoff will be removed
qc_rrna_cutoff = 40
// Cutoff for total number of detectable genes/features
// cells with values lower than the cutoff will be removed
qc_nFeature_cutoff = 50
// Cutoff for total number of UMI counts
// cells with values lower than the cutoff will be removed
qc_nCount_cutoff = 200
// Cutoff for number of cells with expression for feature/gene
// genes/features with values lower than the cutoff will be removed
qc_nCell_cutoff = 10
// Seeker only: background removal parameters
// Chip size in um (3000 for 3x3 mm, 10000 for 10x10 mm)
qc_seeker_chip_size_um = 10000
// Minimum log10(UMI) threshold for background removal
qc_seeker_min_log10_umi = 1.4
// Coarse neighbourhood width in um
qc_seeker_m = 40
// Fine neighbourhood width in um
qc_seeker_n = 100
// Minimum bead count per coarse neighbourhood
qc_seeker_min_beads_coarse = 5
// Minimum bead count per fine neighbourhood
qc_seeker_min_beads_fine = 10
// Normalization
// normalization method for dimension reduction and clustering, either SCT, LogNormalize, scran, SpaNorm, or TFIDF. SpaNorm is not available for scRNAseq data type.
norm_norm_dimreduc = "LogNormalize"
// normalization method for differential testing, either SCT, LogNormalize, scran, SpaNorm, or TFIDF. SpaNorm is not available for scRNAseq data type.
norm_norm_diff = "LogNormalize"
// adjustment method for SpaNorm, can be one of logpac, pearson, meanbio, medbio, or NA (scRNAseq)
norm_spanorm_adj_method = "NA"
// cell-cycle
// "0" or "1" to indicate whether to estimate and correct for cell-cycle effect.
norm_cellcycle_correction_flag = "0"
// Full path to gene list (Gene symbols) for cell-cycle S-phase, one gene per line
// required if cellcycle_correction is set to "1"
norm_genelist_S_phase = "${projectDir}/docs/S_genes_human.tsv"
// Full path to gene list (gene symbols) for cell-cycle G2M-phase, one gene per line
// required if cellcycle_correction is set to "1"
norm_genelist_G2M_phase = "${projectDir}/docs/G2M_genes_human.tsv"
// Analysis to identify spatially variable genes (SVGs)
// Only applicable to spatial data
// Method to use for SVG idenfication, either markvariogram, moransi, SparkX or HVG
cluster_svg_method = "moransi"
// Deconvolution-analysis parameters
// Deconvlution analysis is only applies to spatial data
// Only one of deconvolution and mapping analysis can be enabled, not both
decon_deconvolution_analysis = false
// Full path to reference data, either a seurat object in .rds format or a anndata object in .h5ad format
decon_reference_path = "NA"
// Assay of the reference used for deconvolution
decon_reference_assay = "RNA"
// Assay of the query used for deconvolution
decon_query_assay = "Spatial"
// Character indicating reference data, e.g. cell type, to use for deconvolution
decon_refdata = "NA"
// doublet mode for RCTD, can be either doublet, multi or full
decon_rctd_doublet_mode = "full"
// optional gene list used for deconvolution, one gene per line
// set to "NA" to use default strategy used by RCTD
decon_rctd_gene_list_reg = "NA"
// Mapping-analysis parameters
// Mapping analysis applies to both scRNAseq and spatial data
// Only one of deconvolution and mapping analysis can be enabled, not both
// Mapping is performed using transfer anchors from Seurat
map_mapping_analysis = false
// Full path to reference seurat object in .rds format
map_reference_path = "NA"
// Assay to use to perform mapping for reference
map_reference_assay = "RNA"
// Assay to use to perform mapping for query
map_query_assay = "RNA"
// Character or characters seperated by ',' indicating reference data to use for mapping
map_refdata = "NA"
// Name of dimensional reduction to use from the reference
map_reference_reduction = "pca"
// Name of normalization method used: LogNormalize or SCT
// SCT assay for integrated reference is currently not compatible
map_seurat_normalization_method = "LogNormalize"
// 0 or 1 indicating whether to add prediction assay to the seurat object
map_seurat_prediction_assay = "1"
// DimReduc object to use from the reference data to project UMAP
map_seurat_reduction_model = "umap"
// Strategy to combine samples
// whether to perform merge-based analysis
combine_merge_analysis = false
// whether to perform integration-based analysis
combine_integration_analysis = true
// if merge_analysis is enabled, whether to stop after merge-based analysis. Could be useful if you want to evaluate parameters like resolution.
combine_merge_only = false
// if integration_analysis is enabled, whether to stop after integration-based analysis. Could be useful if you want to evaluate parameters like resolution.
combine_integration_only = false
// Integration strategy
// cca, rpca, harmony, fastmnn, scvi or cellcharter
combine_integration_method = "harmony"
// "0" or "1" to indicate whether to perform sketch-based workflow. Recommend to enable for large number of samples/cells
combine_sketch_flag = "0"
// "0" or "1" to indicate whether to use bpcells. Recommend to enable for large number of samples/cells. Note that if bpcells is enabled, only LogNormalize can be used as normalization method.
combine_bpcells_flag = "0"
// Clustering
// Resolution parameter used to identify number of clusters
cluster_resolution = 0.8
// Method used for visualization, either tsne or umap
cluster_vismethod = "umap"
// method for embeddings, set to 'NA' to use default PCA. For scRNA-seq
// can be set to 'geneformer'; for spatial data, can be set to one of 'banksy', 'stagate', 'spatialpca' 'spatialleiden', 'cellcharter'
cluster_embed_method = "NA"
// method for clustering
// can be one of 'louvain', 'leiden'
cluster_cluster_method = "leiden"
// lambda parameter for Banksy, larger values yield more spatially coherent domains. For Visuim, recommend to use 0.2 for domain segmentation. For Visuim HD, use 0.2 for cell typing, and 0.8 for domain segmentation
// only applicable to spatial data
cluster_banksy_lamda = 0.2
// k_geom parameter for Banksy, larger values will yield larger domains. Recommend to use 18 for Visium data
// only applicable to spatial data
cluster_banksy_k_geom = 18
// Differential expression
// Identity to group the cells to perform DE comparisons
// Could be one of "cluster" (unsupervised clusters), "decon_cell_type" (if deconvolution analysis is enabled), "map_cell_type" (if mapping-based analysis is enabled)
diff_idents = "cluster"
// 0 or 1 indicating whether to perform pseudo-bulk
// based analysis. Could be helpful if there are many samples per condition
diff_pseudobulk_flag = "0"
// Character value specifying control group for differential expression analysis
// Set to NA to disable
diff_control_var = "NA"
// Character value specifying case group for differential expression analysis
// Set to NA to disable
diff_case_var = "NA"
// Covariates to adjust, when performing differential analysis between conditions
// values should be from column names from sampleinfo file
diff_covariate_list = "NA"
// Denotes which statistical test to use
diff_test = "wilcox"
// fold change cutoff to identify differentially expressed genes
diff_fc = 2
// p value cutoff to identify differentially expressed genes
diff_pval = 0.01
// "0" or "1" to indicate whether to use Bonferroni adjusted p value
diff_pval_flag = "1"
// percentage of experssion cutoff to identify differentially expressed genes
diff_pct=20
}
profiles {
// Default profile (local execution)
local {
process.executor = 'local'
process.memory = '100Gb'
process.INTEGRATESAMPLES.memory = '200G'
workDir = './work'
}
// SLURM execution profile
slurm {
process.executor = 'slurm'
process.queue = 'cpu-short'
//process.memory = '100Gb'
//process.INTEGRATESAMPLES.memory = '200G'
process.clusterOptions = '--cpus-per-task 10 --mem 200G'
process.time = '6h'
workDir = './work'
}
}
includeConfig "${projectDir}/common.config"