@@ -8,6 +8,30 @@ base_calling_model: "resources/models/rna004_130bps_sup@v5.1.0"
88# a BWA index will be built if it does not exist for this fasta file
99fasta : " resources/ref/sacCer3-mature-tRNAs-dual-adapt-v2.fa"
1010
11+ # Adapter sequences for tRNA reference building/validation
12+ # These must match what the Remora charging model was trained on.
13+ # The charging classification uses the CCAGGC 6-mer junction:
14+ # CCA = tRNA 3' end (last 3 bases of mature tRNA)
15+ # GGC = first 3 bases of 3' adapter
16+ # Therefore, the 3' adapter MUST start with GGC for classification to work.
17+ adapters :
18+ # 5' adapter prepended to tRNA (23bp, the first tRNA base is included as variable N)
19+ five_prime : " CCTAAGAGCAAGAAGAAGCCTGG"
20+ # 3' adapter appended after tRNA CCA end (40bp, starts with GGC)
21+ three_prime : " GGCTTCTTCTTGCTCTTCCAACCTTGCCTTAAAAAAAAAA"
22+
23+ # Reference validation/building mode
24+ # The pipeline validates that the reference FASTA has proper adapter structure
25+ # before alignment. This ensures the CCAGGC junction exists for charging classification.
26+ #
27+ # Modes:
28+ # validate: Check existing adapted reference (default)
29+ # build: Create adapted reference from raw tRNA sequences
30+ reference :
31+ mode : " validate"
32+ # For build mode only: path to raw tRNA FASTA (without adapters, must end in CCA)
33+ raw_fasta : null
34+
1135# If a kmer table if provided then the pipeline will use get_signal_metrics.py to extract metrics using remora
1236# from: https://github.com/nanoporetech/kmer_models/tree/master/rna004
1337remora_kmer_table : " resources/kmers/9mer_levels_v1.txt"
@@ -24,41 +48,41 @@ dorado_model: rna004_130bps_sup@v5.1.0
2448# see https://github.com/comprna/modkitopt
2549# these params improve F1 by 51% (m6A) and 1251% (pseU) compared to defaults
2650modkit :
27- # global threshold for canonical base confidence
28- filter_threshold : 0.5
29- # per-modification pass thresholds (mod code or ChEBI ID : threshold)
30- # a = N6-methyladenosine (m6A)
31- # m = 5-methylcytosine (m5C)
32- # 17802 = pseudouridine (pseU)
33- # 17596 = inosine
34- mod_thresholds :
35- a : 0.99
36- m : 0.99
37- " 17802 " : 0.995
38- " 17596 " : 0.99
51+ # global threshold for canonical base confidence
52+ filter_threshold : 0.5
53+ # per-modification pass thresholds (mod code or ChEBI ID : threshold)
54+ # a = N6-methyladenosine (m6A)
55+ # m = 5-methylcytosine (m5C)
56+ # 17802 = pseudouridine (pseU)
57+ # 17596 = inosine
58+ mod_thresholds :
59+ a : 0.99
60+ m : 0.99
61+ " 17802 " : 0.995
62+ " 17596 " : 0.99
3963
4064# additional options for particular commands
4165opts :
42- # additional options for dorado basecalling
43- # XXX place modified bases first as the arg parser gets confused
44- # XXX add `-v` for verbose logging
45- dorado : " --modified-bases pseU m5C inosine_m6A --emit-moves "
66+ # additional options for dorado basecalling
67+ # XXX place modified bases first as the arg parser gets confused
68+ # XXX add `-v` for verbose logging
69+ dorado : " --modified-bases pseU m5C inosine_m6A --emit-moves "
4670
47- # additional options for bwa alignment
48- # based on Novoa lab optimising bwa for tRNA alignment
49- # the -h 20 option is used to increase the number of secondary alignments reported in the XA tag
50- bwa : " -W 13 -k 6 -T 20 -x ont2d"
71+ # additional options for bwa alignment
72+ # based on Novoa lab optimising bwa for tRNA alignment
73+ # the -h 20 option is used to increase the number of secondary alignments reported in the XA tag
74+ bwa : " -W 13 -k 6 -T 20 -x ont2d"
5175
52- # requires positive strand alignment
53- # requires at least 1 5' adapter base
54- # requires 1 3' adapter base in the discriminating adapter region between charged and uncharged (v2 adapters).
55- bam_filter : " -5 24 -3 23 -s"
76+ # requires positive strand alignment
77+ # requires at least 1 5' adapter base
78+ # requires 1 3' adapter base in the discriminating adapter region between charged and uncharged (v2 adapters).
79+ bam_filter : " -5 24 -3 23 -s"
5680
57- # requires positive strand alignment and excludes non-primary alignments
58- coverage : " --filterRNAstrand 'reverse' --samFlagExclude 256"
81+ # requires positive strand alignment and excludes non-primary alignments
82+ coverage : " --filterRNAstrand 'reverse' --samFlagExclude 256"
5983
60- # pass additional options to get_signal_metrics.py script which uses Remora to calculate metrics
61- remora : " "
84+ # pass additional options to get_signal_metrics.py script which uses Remora to calculate metrics
85+ remora : " "
6286
6387# WarpDemuX demultiplexing (optional, disabled by default)
6488#
7195#
7296# WDX4_tRNA_rna004_v1_0 has improved recovery (+3-7%) compared to WDX4b_tRNA_rna004_v1_0.
7397warpdemux :
74- enabled : false
75- barcode_kit : " WDX4_tRNA_rna004_v1_0"
76- save_boundaries : true
77- threads : 8
98+ enabled : false
99+ barcode_kit : " WDX4_tRNA_rna004_v1_0"
100+ save_boundaries : true
101+ threads : 8
0 commit comments