-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparams.config
More file actions
49 lines (49 loc) · 7.15 KB
/
params.config
File metadata and controls
49 lines (49 loc) · 7.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
params {
//============================Required parameters=======================================================================
mode = 'prod' //Set mode to 'prod' to run on new dataset otherwise brioche will run on the test data
notifications = true //Option to (dis|en)able email notifications, set to false to disable
emailaddress = "" //Replace with your e-mail address to receive notifications
resultsdir = "" //Path where brioche results are saved, default is the launch-directory/brioche-results
genomefasta = "${projectDir}/Data/CDCFrontier_GA_v10_Chrom_3_7.fa" //Replace with absolute path to the fasta file for your assembly
restrict2chrom = "" //Restrict search to this chromosome
chromstoexclude = "" //List of chromosomes to exclude separated by a comma - e.g "chr1,chr2,chrun"
genomename = 'CDCFrontier_GA_v10' //Name of genome that is used in naming output files
probename = 'AVRGRDC_Pulses_v1' //Name of Chip/Marker set used in the naming of the output file
istarget3primeend = 'TRUE' //Is the target in the probe at the 3' prime end (only relevant for SNP chip data), 'TRUE' or 'FALSE'
targetdesign = "${projectDir}/Data/AVRGRDC_Pulses_v1_20006795X370754_A2_Chickpea-target-new-format.tsv" //csv or tsv table with ID,ProbeSequence,Target bp position, Target
markercharacter = 'D' //Character used to replace target marker in probe sequence
buildblastdbonly = false //Set to true, if all you want to do is build a blast database
usetargetchrom = 'No' // Set to either Yes or No. Set to Yes to use a list of known target chromosomes to preference markers which fall on certain chromosomes. When set to yes, will use data from chromchrommatch and markertargetsites to assist with optimal marker location identification
usesharedmarkersmap = 'No' // Set to either Yes or No. Set to Yes to use to use a list of known closely located markers per marker (similarSNPsmap) to identify whether target markers are aligned properly (target must be same chromsome as X fraction of close markers given)
useldedgemap = 'Yes' // Set to either Yes or No. Set to Yes to incorporate raw pairwise LD outputs to inform on top marker site.
usegeneticmap = 'Yes' // Set to either Yes or No. Set to Yes to incorporate genetic mapping outputs to inform on top marker chromosome. Requires chromchrommatch to be provided to
sharedmarkersmapcutoff = '0.5' // (not in use yet) Set to the fraction of markers required from similarmarkersmap to allow for a given marker map to be correct. e.g., 1 is 100% 0.5 is 50% of provided markers
chromchrommatch = "Chromesome-chromosome_mappings.csv" // Absolute file path to pairwise table showing chromosomes comparison between the chromosome names of known target chromosomes and the names of new chromosomes on the reference genome e.g ChrA,Chr1 Scaffold2, Chr2
markertargetsites = "targetchromsknownmarkers.csv" // Absolute path to 2 column table of marker and target chromosome of each marker to preference in filtering e.g., marker1, ChrA.
similarmarkersmap = "linkedSNPs.csv" // Absolute path to a multicolumn table with the first column being the target marker and all subsequent columns being known nearby markers
geneticmap = "genetic_mapping_input.csv" // Absolute path to a multicolumn table with the first column being the target marker the second the number of unique chromosomes a marker was mapped to genetically, and the following 10 being the name and hits for each chromosome amongst an arbitrary number of genetic mappings which were tested
ldedgemap = "examplelinkagedisequalibriumfile.ld" // Absolute path to an pairwise LD edge output table (PLINK TASSEL Haploview long tables) for linkage disequilibrium scores e.g CHR_A BP_A SNP_A CHR_B BP_B SNP_B R2 Dprime P
//============================Options for BLAST=======================================================================
evalue = 0.05 //The BLAST E-value is the number of expected hits of similar quality (score) that could be found just by chance.
dust = 'no' //Filtering option to filter query sequence with DUST (Format: 'yes', 'level window linker', or 'no' to disable)
otherblastoptions = '' //Other parameters that need to be passed to BLAST, should be of the format "-param1 val1 -param2 val2" Here you can play around with dropping the word size to more reliably return hits for for smaller and more diverged markers (-word_size)
query_chunk_size = '20000' // Maximum size of dataset to be input into blastn at a time. fasta sequences will be split into subprocesses and run. This acts as the split size for downstream R scripts to (ideal is < 50,000 for blastn and downstream processes).
//================================XT data with target SNP at end of probe===============================================
minlength = 40 //minimum length of HSP to be considered fully hybridized
extendablebps = 3 //number of matching base pairs from the 3 prime end for a probe to be considered as extendable
maxgaps = 0 // New option to implement
//================================Filtering options====================================================================
coverage = 80 //Option to filter any hits with coverage less than the provided percentage
pident = 92 //Option to filter any hits with pident less than the provided percentage
maximumhits = 10 //Filter probes with hits more than maximumhits - change in logic
localdupdist = '100000' // length in bp of region (both u/stream and d/stream each way) to consider for local duplication e.g., if value set to 1000000, search 100000 upstream and 100000 downstream of top marker hit for additional high identity matches indicating a local duplication of this region in the reference genome mapping to. This can be reported as additional data when performing insilico
keeplocalduppos = 'yes' // Keep markers where a local duplicate was detected if the allele states are identical. Will report the lowest sstart location as the Marker position.
keepduplicates = 'FALSE' // REDUNDANT FEATURE!!!! SUPERSCEDED BY SEVERAL MORE EXPANSIVE SYSTEMS AND OPTIONS Whether to retain or filter markers with multiple hits, maximumhits
//================================Internal paramaters=============================================================
mappingstate = "Onset" // Do not change this paramater! This is an internal carry over paramater to assist with insilico genotyping. Onset=first round so use user provided orientations, Ongoing=Second or later round so use internal ProjectDIR orientationfile.
//================================slurm options====================================================================
hpcaccount = 'user' //account to be used on basc, change this to the account associated with the project etc
shortqueue = 'shortrun' //queueing option for jobs with short run time
longqueue = 'batch' //queueing option for jobs with long run times
queuesize = 100 //Number of jobs submitted to slurm queue at any given time
}