Skip to content

Commit eda84ab

Browse files
committed
Make reheader --fai aware of long contigs
by switching to 64-bit version of the faidx_seq_len() function Resolves #1959
1 parent b7b2a32 commit eda84ab

File tree

6 files changed

+43
-5
lines changed

6 files changed

+43
-5
lines changed

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ Changes affecting specific commands:
8484

8585
- Fix `-H` header output in formatting expressions containing newlines
8686

87+
* bcftools reheader
88+
89+
- Make `-f, --fai` aware of long contigs not representable by 32-bit integer (#1959)
90+
8791
* bcftools +split-vep
8892

8993
- Prevent a segfault when `-i/-e` use a VEP subfield not included in `-f` or `-c` (#1877)

reheader.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ static char *copy_and_update_contig_line(faidx_t *fai, char *line, void *chr_see
6868
kstring_t key = {0,0,0}, val = {0,0,0}, tmp = {0,0,0};
6969
char *chr_name = NULL, *p, *q = line + 9; // skip ##contig=
7070
char *end = q;
71-
int nopen = 1, chr_len = 0;
71+
int nopen = 1;
72+
hts_pos_t chr_len = 0;
7273
while ( *end && *end!='\n' ) end++;
7374
while ( *q && *q!='\n' && nopen>0 )
7475
{
@@ -118,7 +119,7 @@ static char *copy_and_update_contig_line(faidx_t *fai, char *line, void *chr_see
118119
if ( !strcmp("ID",key.s) )
119120
{
120121
if ( khash_str2int_has_key(chr_seen,val.s) ) continue;
121-
chr_len = faidx_seq_len(fai, val.s);
122+
chr_len = faidx_seq_len64(fai, val.s);
122123
if ( chr_len==-1 )
123124
{
124125
free(val.s); free(key.s); free(tmp.s);
@@ -136,7 +137,7 @@ static char *copy_and_update_contig_line(faidx_t *fai, char *line, void *chr_see
136137
if ( quoted ) kputc('"',&tmp);
137138
}
138139
if ( !chr_name ) return end;
139-
ksprintf(dst,"##contig=<ID=%s,length=%d%s>",chr_name,chr_len,tmp.l ? tmp.s : "");
140+
ksprintf(dst,"##contig=<ID=%s,length=%"PRIhts_pos"%s>",chr_name,chr_len,tmp.l ? tmp.s : "");
140141
free(key.s); free(val.s); free(tmp.s);
141142
return q;
142143
}
@@ -211,7 +212,7 @@ static void update_from_fai(args_t *args)
211212
for (i=0; i<n; i++)
212213
{
213214
if ( khash_str2int_has_key(chr_seen,faidx_iseq(fai,i)) ) continue;
214-
ksprintf(&hdr_txt_new,"##contig=<ID=%s,length=%d>\n",faidx_iseq(fai,i),faidx_seq_len(fai,faidx_iseq(fai,i)));
215+
ksprintf(&hdr_txt_new,"##contig=<ID=%s,length=%"PRIhts_pos">\n",faidx_iseq(fai,i),faidx_seq_len64(fai,faidx_iseq(fai,i)));
215216
}
216217
kputs(tmp+1,&hdr_txt_new);
217218

@@ -699,7 +700,7 @@ int main_reheader(int argc, char *argv[])
699700
int c;
700701
args_t *args = (args_t*) calloc(1,sizeof(args_t));
701702
args->argc = argc; args->argv = argv;
702-
703+
703704
static struct option loptions[] =
704705
{
705706
{"temp-prefix",1,0,'T'},

test/reheader.3.fai

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
1 2364278061 6 2364278061 2364278062
2+
2 2317450362 7159592787 2317450362 2317450363
3+
3 2291775479 9477043156 2291775479 2291775480
4+
4 2192534405 11768818642 2192534405 2192534406
5+
5 2148190925 13961353054 2148190925 2148190926
6+
6 2107674557 16109543986 2107674557 2107674558
7+
7 2082167746 18217218550 2082167746 2082167747
8+
8 2081484518 20299386303 2081484518 2081484519
9+
9 2024734096 22380870828 2024734096 2024734097
10+
10 1752849333 2364278075 1752849333 1752849334
11+
11 1650012615 4117127416 1650012615 1650012616
12+
12 1392452741 5767140039 1392452741 1392452742

test/reheader.3.vcf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
##fileformat=VCFv4.3
2+
##contig=<ID=1,length=1>
3+
#CHROM POS ID REF ALT QUAL FILTER INFO
4+
1 22 . A G . . .

test/reheader.6.out

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
##fileformat=VCFv4.3
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##contig=<ID=1,length=2364278061>
4+
##contig=<ID=2,length=2317450362>
5+
##contig=<ID=3,length=2291775479>
6+
##contig=<ID=4,length=2192534405>
7+
##contig=<ID=5,length=2148190925>
8+
##contig=<ID=6,length=2107674557>
9+
##contig=<ID=7,length=2082167746>
10+
##contig=<ID=8,length=2081484518>
11+
##contig=<ID=9,length=2024734096>
12+
##contig=<ID=10,length=1752849333>
13+
##contig=<ID=11,length=1650012615>
14+
##contig=<ID=12,length=1392452741>
15+
#CHROM POS ID REF ALT QUAL FILTER INFO
16+
1 22 . A G . . .

test/test.pl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,7 @@
729729
run_test(\&test_vcf_reheader,$opts,in=>'empty',out=>'reheader.empty.out',header=>'reheader.empty.hdr');
730730
run_test(\&test_vcf_reheader,$opts,in=>'reheader.2',out=>'reheader.5.out',args=>'-f {PATH}/reheader.fai',nostdin=>1);
731731
run_test(\&test_vcf_reheader,$opts,in=>'reheader.2',out=>'reheader.5.out',args=>'-h {PATH}/reheader.2.hdr -f {PATH}/reheader.fai',nostdin=>1);
732+
run_test(\&test_vcf_reheader,$opts,in=>'reheader.3',out=>'reheader.6.out',args=>'-f {PATH}/reheader.3.fai',nostdin=>1);
732733
run_test(\&test_rename_chrs,$opts,in=>'annotate');
733734
run_test(\&test_vcf_convert,$opts,in=>'convert',out=>'convert.gs.gt.gen',args=>'-g -,.');
734735
run_test(\&test_vcf_convert,$opts,in=>'convert',out=>'convert.gs.gt.ids.gen',args=>'-g -,. --vcf-ids');

0 commit comments

Comments
 (0)