-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.yaml
More file actions
72 lines (63 loc) · 2.44 KB
/
config.yaml
File metadata and controls
72 lines (63 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# General Configuration
######################
sample_directory: ""
######################
# Information about reference
#############################
reference: "~/reference/GRCh38-2.1.0/genome.fa"
bed_file: "~/genes/2021_Gene_Pane_LR-GRCh38_challenging_mrg_genes_included_less_than_90percent_GIAB_v4_sorted.bed"
exon_file: "~/genes/2021_Gene_Pane_LR-GRCh38_challenging_mrg_genes_included_less_than_90percent_GIAB_v4_exones.bed"
#############################
# SNVs Configuration
####################
SNVs_dir_name: "SNVs"
clair_threads: 5
reads_technology: 'hifi'
bed_region: "~/genes/2021_Gene_Pane_LR-GRCh38_challenging_mrg_genes_included_less_than_90percent_GIAB_v4_stat.bed"
af: 0.25
####################
# SVs Configuration
####################
sniffles_threads: 5
####################
# Bam Coverage
###################
mosdepth_threads: 5
coverage_prefix: ""
buffer: 10000
###################
# Benchmark
################
# SNVs
# genes_dir: "~/scripts/snakefiles/genes"
genes_dir: "~/scripts/snakefiles/genes_273"
# genes_dir: "/stornext/snfs4/next-gen/scratch/medhat/projects/infertile_man/genes/genes_bed"
output_dir_name: "SNVs_benchmark_dir"
# gold_vcf: "~/scripts/snakefiles/SNVs_truthset/HG002_dipcall_original.SNV.decompose.vcf.gz"
gold_vcf: "~_scripts/truthset/HG002_GRCh38_CMRG_smallvar_v1.00.vcf.gz"
#gold_vcf: "~/truthset/NA12878/SNVs/HG001_GRCh38_1_22_v4.2.1_benchmark_CMRG.vcf.gz"
gold_variant_bed: "~/scripts/truthset/HG002_GRCh38_CMRG_smallvar_v1.00_variants.bed" # This is the same as above but converted to bed, used in calculating coverage, rule: VariantCoverage
REF_38: "/users/sedlazec/ccdg_tech/reference/GRCh38-2.1.0/genome_mainchrs.sdf"
genes_without_benchmark: "~/genes/genes_without_benchmark.tsv"
SNVs_bench_threads: 5
SNVs_min_quality: 0
SNVs_split_file_name: "SNVs_bench_split"
min_snv_coverage_to_benchmark: 8
extra_rtg: "" #"--squash-ploidy"
samples_to_compare: "--sample HG002,HG002"
# SVs
GOLD_SV: "~/home/projects/medical_region/truth_set/NA24385/SVs_medical/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.vcf.gz"
BED_SV: "~/home/projects/medical_region/truth_set/NA24385/SVs_medical/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.bed"
SV_OUT_DIR: "sv_benchmark"
extra: "--passonly --pctsim=0 --multimatch" # do not use --giabreport it raise errors
# Coverage
covmin: "covmin"
MAPQ: 20
covmin_value1: 8
covmin_value2: 10
covmin_value3: 20
cov0: "cov0"
plot_dir: "plots"
read_flag: 3076
percentage_uncovered_genes: 50
################