Skip to content

Commit 3c7f2ea

Browse files
authored
Merge pull request #149 from PolinaBevad/f_fix_sv_realignment_disable_option
Fix sv realignment - added delete duplicates option
2 parents 87abedb + c221314 commit 3c7f2ea

File tree

9 files changed

+39
-7
lines changed

9 files changed

+39
-7
lines changed

Readme.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,9 @@ Variant frequency is more than 10% for the non-monomer MSI and 25% for the monom
431431
- `-Y|--ref-extension INT`
432432
Extension of bp of reference to build lookup table. Default to 1200 bp. Increase the number will slowdown the program.
433433
The main purpose is to call large indels with 1000 bp that can be missed by discordant mate pairs.
434-
434+
- `--deldupvar`
435+
Turn on deleting of duplicate variants in output that can appear due to VarDict linear work on regions. Variants in this mode are
436+
considered and outputted only if start position of variant is inside the region interest.
435437
## Output columns
436438

437439
1. Sample - sample name

src/main/java/com/astrazeneca/vardict/Configuration.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,11 @@ public class Configuration {
217217
*/
218218
public boolean disableSV = false; //-U
219219

220+
/**
221+
* Turn on deleting of duplicate variants that can appear due to VarDict linear work on regions.
222+
*/
223+
public boolean deleteDuplicateVariants = false;
224+
220225
/**
221226
* The minimum distance between two SV clusters in term of read length
222227
*/

src/main/java/com/astrazeneca/vardict/Main.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ private void run(CommandLine cmd) throws ParseException, IOException {
140140
conf.disableSV = cmd.hasOption("U");
141141
conf.uniqueModeSecondInPairEnabled = cmd.hasOption("UN");
142142
conf.uniqueModeAlignmentEnabled = cmd.hasOption("u");
143+
conf.deleteDuplicateVariants = cmd.hasOption("deldupvar");
143144

144145
conf.INSSIZE = getIntValue(cmd, "w", 300);
145146
conf.INSSTD = getIntValue(cmd, "W", 100);
@@ -197,6 +198,7 @@ private static Options buildOptions() {
197198
options.addOption("u", false, "Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using forward read only.");
198199
options.addOption("UN", false, "Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using first read only.");
199200
options.addOption("chimeric", false, "Indicate to turn off chimeric reads filtering.");
201+
options.addOption("deldupvar", false, "Turn on deleting of duplicate variants. Variants in this mode are considered and outputted only if start position of variant is inside the region interest.");
200202
options.addOption("U", "nosv", false, "Turn off structural variant calling.");
201203

202204
options.addOption(OptionBuilder.withArgName("bit")

src/main/java/com/astrazeneca/vardict/modules/ToVarsBuilder.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,16 +89,18 @@ public static Tuple.Tuple2<Integer, Map<Integer, Vars>> toVars(Region region,
8989
if (v.isEmpty()) {
9090
continue;
9191
}
92-
//Skip if position is outside region of interest
93-
if (v.sv == null) {
92+
93+
//Skip if there are no structural variants on position or if the delete duplication option is on
94+
if (v.sv == null || conf.deleteDuplicateVariants) {
95+
//Skip if start position is outside region of interest
9496
if (p < region.start || p > region.end) {
9597
continue;
9698
}
97-
//skip position if it has no coverage
98-
if (!cov.containsKey(p)) {
99-
continue;
100-
}
99+
}
101100

101+
//Skip position if it has no coverage (except SVs)
102+
if (v.sv == null && !cov.containsKey(p)) {
103+
continue;
102104
}
103105

104106
Set<String> vk = new HashSet<String>(v.keySet());

testdata/fastas/hg38.fa.csv

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Simple,hg38.fa,DelOutOfBound.chr1_150578117_150578617.bam,chr1,150578117,150578617,-f 0.001 --deldupvar
2+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578429 150578429 T C 4965 5 2527 2432 1 4 T/C 0.0010 2;2 48.4 1 45.0 0 60.0 10.000 0.0010 0 1 2.000 3 2.2 5 4965 GCCGTCGCTGAAAACATGGA CATCACTCGAGACAACGATT chr1:150578118-150578617 SNV 0 0
3+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578557 150578557 C T 1965 2 591 1372 1 1 C/T 0.0010 2;2 41.5 1 42.0 1 60.0 4.000 0.0010 0 1 3.000 1 1.0 2 1965 ACCGGCGCAAGATTCGCCTG CCACCCGCGGCGGGAAAATC chr1:150578118-150578617 SNV 0 0
4+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578606 150578606 A G 1560 3 506 1051 0 3 A/G 0.0019 2;0 37.0 0 45.0 0 60.0 6.000 0.0019 0 0 4.000 1 2.0 3 1558 GATTTACAGAACTCAGGTTG CCCCACTTGAAATTGACATC chr1:150578118-150578617 SNV 0 0
5+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578577 150578577 C T 1847 2 542 1303 1 1 C/T 0.0011 2;2 28.0 1 45.0 0 60.0 4.000 0.0011 0 0 1.000 1 1.0 2 1847 CCCACCCGCGGCGGGAAAAT GCTACTGGGATTTACAGAAC chr1:150578118-150578617 SNV 0 0
6+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578149 150578149 C T 1673 2 1306 365 2 0 C/T 0.0012 2;0 63.5 1 45.0 0 60.0 4.000 0.0012 0 2 1.000 1 1.0 2 1671 AGAGCCTGCAAACAGCCGTG GTCATAAAAACCTTTAGATA chr1:150578118-150578617 SNV 0 0
7+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578151 150578151 T C 1683 2 1308 373 1 1 T/C 0.0012 2;2 38.0 1 45.0 0 60.0 4.000 0.0012 0 0 1.000 1 1.0 2 1682 AGCCTGCAAACAGCCGTGCG CATAAAAACCTTTAGATATC chr1:150578118-150578617 SNV 0 0
8+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578182 150578183 TA T 2219 3 1580 636 2 1 A/-1 0.0014 2;2 62.3 1 45.0 0 60.0 6.000 0.0014 0 7 8.000 1 0.0 3 2219 TTTAGATATCCCCACCTCTC AAAAAAATCCTTCATTCTAC chr1:150578118-150578617 Deletion 0 0
9+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578182 150578182 T TA 2182 6 1414 565 5 1 T/+1 0.0027 2;2 33.3 1 47.0 1 62.8 12.000 0.0030 0.0005 8 9.000 1 0.3 6 1982 TTTAGATATCCCCACCTCTC AAAAAAAATCCTTCATTCTA chr1:150578118-150578617 Insertion 0 0
10+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Simple,hg38.fa,DelOutOfBound.chr1_150578117_150578617.bam,chr1,150578117,150578617,-f 0.001
2+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578429 150578429 T C 4965 5 2527 2432 1 4 T/C 0.0010 2;2 48.4 1 45.0 0 60.0 10.000 0.0010 0 1 2.000 3 2.2 5 4965 GCCGTCGCTGAAAACATGGA CATCACTCGAGACAACGATT chr1:150578118-150578617 SNV 0 0
3+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578557 150578557 C T 1965 2 591 1372 1 1 C/T 0.0010 2;2 41.5 1 42.0 1 60.0 4.000 0.0010 0 1 3.000 1 1.0 2 1965 ACCGGCGCAAGATTCGCCTG CCACCCGCGGCGGGAAAATC chr1:150578118-150578617 SNV 0 0
4+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578606 150578606 A G 1560 3 506 1051 0 3 A/G 0.0019 2;0 37.0 0 45.0 0 60.0 6.000 0.0019 0 0 4.000 1 2.0 3 1558 GATTTACAGAACTCAGGTTG CCCCACTTGAAATTGACATC chr1:150578118-150578617 SNV 0 0
5+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578577 150578577 C T 1847 2 542 1303 1 1 C/T 0.0011 2;2 28.0 1 45.0 0 60.0 4.000 0.0011 0 0 1.000 1 1.0 2 1847 CCCACCCGCGGCGGGAAAAT GCTACTGGGATTTACAGAAC chr1:150578118-150578617 SNV 0 0
6+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150577490 150578242 TCCTAGAAAACAAAAAAGAAAAGCACATTTTCAAGTATGGGTTTCTGCTCTCTTACAACACTACTATCCAGAGAGAAGGTAAATTAAAATATCTAAGGTTTCCCCCTAAAGCAATAGTAAACCAATTATTATTCACCCCGGGGCAAAAAAAGAAAATAAGTTCTTCAGTTTCTTAAGCGGGGACCCATGATAATTAAACTTCATTTTACAGTACTATCTTGGTCTTATTAGGATTTATCCTAACAAAAGGACTTCCAGAGTTCCCATGAGGTAAAAGACGAAAACCCTTATTCTCAAATCTCCACCACCTAATATTTCAGTAACGCCTCAATTGCGCAGATCAGTTTTTCTCAACGTAGTTAAGTACATCCTGCATCAGATCTGGGTTTGTTTAAAACGCAGGCTCCACCCCATATCTTCTCCATTAAAACAGGCACCGCCTTAGGAATTATTTCAGCTATCCTCCCCTGGTGACTTATTTTTTCATGGTTTGAATCCACTGAAGAGTATTCAACTGAATTGGAACAAAAAATTGCCTAGAGAAAACCAAGATTTTTGATACAATTCAAGACCGCCATACCACTATCTCTAAGGATCCACTACTTAAATCAACATCAAATAAACAATGGTCCTTTAGTTAGAGCCTGCAAACAGCCGTGCGTCATAAAAACCTTTAGATATCCCCACCTCTCTAAAAAAAATCCTTCATTCTACTTCCACTCCAAGGCCCCTTTCATCCTTAAGGCAAACTTA T 3528 1377 1307 1553 768 609 C/-752 0.3903 2;2 37.2 1 44.9 1 60.0 2754.000 0.3257 0.3903 2 1.000 1 0.1 1377 4228 GGAAGAACTCCACAAACCCA CCCAGCCTCTTTGTTTAACT chr1:150578118-150578617 Deletion 0 713-722-1
7+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578149 150578149 C T 1673 2 1306 365 2 0 C/T 0.0012 2;0 63.5 1 45.0 0 60.0 4.000 0.0012 0 2 1.000 1 1.0 2 1671 AGAGCCTGCAAACAGCCGTG GTCATAAAAACCTTTAGATA chr1:150578118-150578617 SNV 0 0
8+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578151 150578151 T C 1683 2 1308 373 1 1 T/C 0.0012 2;2 38.0 1 45.0 0 60.0 4.000 0.0012 0 0 1.000 1 1.0 2 1682 AGCCTGCAAACAGCCGTGCG CATAAAAACCTTTAGATATC chr1:150578118-150578617 SNV 0 0
9+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578182 150578183 TA T 2219 3 1580 636 2 1 A/-1 0.0014 2;2 62.3 1 45.0 0 60.0 6.000 0.0014 0 7 8.000 1 0.0 3 2219 TTTAGATATCCCCACCTCTC AAAAAAATCCTTCATTCTAC chr1:150578118-150578617 Deletion 0 0
10+
DelOutOfBound.chr1_150578117_150578617 testbed chr1 150578182 150578182 T TA 2182 6 1414 565 5 1 T/+1 0.0027 2;2 33.3 1 47.0 1 62.8 12.000 0.0030 0.0005 8 9.000 1 0.3 6 1982 TTTAGATATCCCCACCTCTC AAAAAAAATCCTTCATTCTA chr1:150578118-150578617 Insertion 0 0

0 commit comments

Comments
 (0)