Skip to content

Commit 466ceae

Browse files
committed
Support for conversion from tags using localized alleles (e.g. LPL, LAD)
1 parent 2abc298 commit 466ceae

File tree

8 files changed

+348
-38
lines changed

8 files changed

+348
-38
lines changed

NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ Changes affecting specific commands:
6868
printing "Requested allele outside valid range". This is now fixed by taking into account
6969
the actual number of ALT alleles.
7070

71+
* bcftools +tag2tag
72+
73+
- Support for conversion from tags using localized alleles (e.g. LPL, LAD) to the family of
74+
standard tags (PL, AD)
75+
7176
* bcftools +trio-dnm2
7277

7378
- Extend --strictly-novel to exclude cases where the non-Mendelian allele

doc/bcftools.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2861,7 +2861,8 @@ By default, appropriate system directories are searched for installed plugins.
28612861
<http://samtools.github.io/bcftools/howtos/plugin.split-vep.html> for more.
28622862

28632863
*tag2tag*::
2864-
Convert between similar tags, such as GL,PL,GP or QR,QA,QS.
2864+
Convert between similar tags, such as GL,PL,GP or QR,QA,QS or tags with localized alleles e.g. LPL,LAD.
2865+
See <http://samtools.github.io/bcftools/howtos/plugin.tag2tag.html> for more.
28652866

28662867
*trio-dnm2*::
28672868
screen variants for possible de-novo mutations in trios

plugins/tag2tag.c

Lines changed: 284 additions & 37 deletions
Large diffs are not rendered by default.

test/tag2tag.LPL.1.1.vcf

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
##fileformat=VCFv4.2
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
4+
##FORMAT=<ID=LAD,Number=.,Type=Integer,Description="Localized field: Allelic Depths">
5+
##FORMAT=<ID=LPL,Number=.,Type=Integer,Description="Local normalized, Phred-scaled likelihoods for genotypes as in original gVCF (without allele reordering)">
6+
##FORMAT=<ID=LAA,Number=.,Type=Integer,Description="Mapping of alt allele index from original gVCF to msVCF, comma-separated, 1-based (each value is the allele index in the msVCF)">
7+
##contig=<ID=chr,length=123456>
8+
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled genotype likelihoods">
9+
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths">
10+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT smpl1 smpl2
11+
chr 1 . A C,G . . . GT:LAD:LPL:LAA:AD:PL 0/0:31:0:.:31,.,.:0,.,.,.,.,. 0/1:17,16:86,0,57:1:17,16,.:86,0,57,.,.,.
12+
chr 2 . G A,T . . . GT:LAD:LPL:LAA:AD:PL 0/2:13,12:82,0,55:2:13,.,12:82,.,.,0,.,55 0/0:34:0:.:34,.,.:0,.,.,.,.,.
13+
chr 3 . T G . . . GT:LAD:LPL:LAA:AD:PL 0/0:29:0:.:29,.:0,.,. 0/1:13,8:81,0,53:1:13,8:81,0,53
14+
chr 4 . A C . . . GT:LAD:LPL:LAA:AD:PL 1/1:0,27:88,61,0:1:0,27:88,61,0 0/1:18,19:85,0,52:1:18,19:85,0,52
15+
chr 5 . T TA,TAA,TAAA,TAAAA,TAAAAA,TAAAAAA,TAAAAAAA . . . GT:LAD:LPL:LAA:AD:PL 5/2:0,8,13:305,339,82,220,0,61:5,2:0,.,13,.,.,8,.,.:305,.,.,220,.,61,.,.,0,.,.,.,.,.,.,339,.,.,.,.,82,.,.,.,.,.,.,.,.,.,.,.,.,.,.,. 2/2:0,25:90,56,0:2:0,.,25,.,.,.,.,.:90,.,.,56,.,0,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.

test/tag2tag.LPL.1.2.vcf

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
##fileformat=VCFv4.2
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
4+
##contig=<ID=chr,length=123456>
5+
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled genotype likelihoods">
6+
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths">
7+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT smpl1 smpl2
8+
chr 1 . A C,G . . . GT:AD:PL 0/0:31,.,.:0,.,.,.,.,. 0/1:17,16,.:86,0,57,.,.,.
9+
chr 2 . G A,T . . . GT:AD:PL 0/2:13,.,12:82,.,.,0,.,55 0/0:34,.,.:0,.,.,.,.,.
10+
chr 3 . T G . . . GT:AD:PL 0/0:29,.:0,.,. 0/1:13,8:81,0,53
11+
chr 4 . A C . . . GT:AD:PL 1/1:0,27:88,61,0 0/1:18,19:85,0,52
12+
chr 5 . T TA,TAA,TAAA,TAAAA,TAAAAA,TAAAAAA,TAAAAAAA . . . GT:AD:PL 5/2:0,.,13,.,.,8,.,.:305,.,.,220,.,61,.,.,0,.,.,.,.,.,.,339,.,.,.,.,82,.,.,.,.,.,.,.,.,.,.,.,.,.,.,. 2/2:0,.,25,.,.,.,.,.:90,.,.,56,.,0,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.

test/tag2tag.LPL.1.3.vcf

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
##fileformat=VCFv4.2
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
4+
##FORMAT=<ID=LAD,Number=.,Type=Integer,Description="Localized field: Allelic Depths">
5+
##FORMAT=<ID=LPL,Number=.,Type=Integer,Description="Local normalized, Phred-scaled likelihoods for genotypes as in original gVCF (without allele reordering)">
6+
##FORMAT=<ID=LAA,Number=.,Type=Integer,Description="Mapping of alt allele index from original gVCF to msVCF, comma-separated, 1-based (each value is the allele index in the msVCF)">
7+
##contig=<ID=chr,length=123456>
8+
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled genotype likelihoods">
9+
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths">
10+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT smpl1 smpl2
11+
chr 1 . A C,G . . . GT:AD:PL 0/0:31,0,0:0,255,255,255,255,255 0/1:17,16,0:86,0,57,255,255,255
12+
chr 2 . G A,T . . . GT:AD:PL 0/2:13,0,12:82,255,255,0,255,55 0/0:34,0,0:0,255,255,255,255,255
13+
chr 3 . T G . . . GT:AD:PL 0/0:29,0:0,255,255 0/1:13,8:81,0,53
14+
chr 4 . A C . . . GT:AD:PL 1/1:0,27:88,61,0 0/1:18,19:85,0,52
15+
chr 5 . T TA,TAA,TAAA,TAAAA,TAAAAA,TAAAAAA,TAAAAAAA . . . GT:LAD:LPL:LAA 5/2:0,8,13:305,339,82,220,0,61:5,2 2/2:0,25:90,56,0:2

test/tag2tag.LPL.1.vcf

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
##fileformat=VCFv4.2
2+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
3+
##FORMAT=<ID=LAD,Number=.,Type=Integer,Description="Localized field: Allelic Depths">
4+
##FORMAT=<ID=LPL,Number=.,Type=Integer,Description="Local normalized, Phred-scaled likelihoods for genotypes as in original gVCF (without allele reordering)">
5+
##FORMAT=<ID=LAA,Number=.,Type=Integer,Description="Mapping of alt allele index from original gVCF to msVCF, comma-separated, 1-based (each value is the allele index in the msVCF)">
6+
##contig=<ID=chr,length=123456>
7+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT smpl1 smpl2
8+
chr 1 . A C,G . . . GT:LAD:LPL:LAA 0/0:31:0:. 0/1:17,16:86,0,57:1
9+
chr 2 . G A,T . . . GT:LAD:LPL:LAA 0/2:13,12:82,0,55:2 0/0:34:0:.
10+
chr 3 . T G . . . GT:LAD:LPL:LAA 0/0:29:0:. 0/1:13,8:81,0,53:1
11+
chr 4 . A C . . . GT:LAD:LPL:LAA 1/1:0,27:88,61,0:1 0/1:18,19:85,0,52:1
12+
chr 5 . T TA,TAA,TAAA,TAAAA,TAAAAA,TAAAAAA,TAAAAAAA . . . GT:LAD:LPL:LAA 5/2:0,8,13:305,339,82,220,0,61:5,2 2/2:0,25:90,56,0:2

test/test.pl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,9 @@
608608
run_test(\&test_vcf_plugin,$opts,in=>'view.GL',out=>'view.PL.vcf',cmd=>'+tag2tag --no-version',args=>'-- -r --gl-to-pl');
609609
run_test(\&test_vcf_plugin,$opts,in=>'view.GL',out=>'view.GL-GP.vcf',cmd=>'+tag2tag --no-version',args=>'-- --gl-to-gp');
610610
run_test(\&test_vcf_plugin,$opts,in=>'view.GP',out=>'view.GT.vcf',cmd=>'+tag2tag --no-version',args=>'-- -r --gp-to-gt -t 0.2');
611+
run_test(\&test_vcf_plugin,$opts,in=>'tag2tag.LPL.1',out=>'tag2tag.LPL.1.1.vcf',cmd=>'+tag2tag --no-version',args=>'-- --LXX-to-XX');
612+
run_test(\&test_vcf_plugin,$opts,in=>'tag2tag.LPL.1',out=>'tag2tag.LPL.1.2.vcf',cmd=>'+tag2tag --no-version',args=>'-- --LXX-to-XX -r');
613+
run_test(\&test_vcf_plugin,$opts,in=>'tag2tag.LPL.1',out=>'tag2tag.LPL.1.3.vcf',cmd=>'+tag2tag --no-version',args=>'-- --LXX-to-XX -r -d AD:0,PL:255 -s 3');
611614
run_test(\&test_vcf_plugin,$opts,in=>'query.variantkey',out=>'query.add-variantkey.vcf',cmd=>'+add-variantkey',args=>'');
612615
run_test(\&test_vcf_plugin,$opts,in=>'query.variantkey',out=>'variantkey-hex.out',cmd=>'+variantkey-hex',args=>'test/');
613616
run_test(\&test_vcf_plugin,$opts,in=>'query.nucleotide',out=>'query.allele-length.tsv',cmd=>'+allele-length',args=>'');

0 commit comments

Comments
 (0)