Skip to content

Commit 6c2c1e9

Browse files
committed
Release 1.18
2 parents 116a87c + 6b699b5 commit 6c2c1e9

File tree

157 files changed

+5922
-2754
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

157 files changed

+5922
-2754
lines changed

INSTALL

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,10 @@ Alpine Linux
232232
Note: To install gsl-dev, it may be necessary to enable the "community"
233233
repository in /etc/apk/repositories.
234234

235+
Note: some older Alpine versions use libressl-dev rather than openssl-dev.
236+
235237
doas apk update # Ensure the package list is up to date
236-
doas apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev libressl-dev gsl-dev perl-dev
238+
doas apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev openssl-dev gsl-dev perl-dev
237239

238240
OpenSUSE
239241
--------

LICENSE

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -723,11 +723,12 @@ Public License instead of this License. But first, please read
723723

724724
-----------------------------------------------------------------------------
725725

726-
LICENSE FOR VariantKey (https://github.com/Genomicsplc/variantkey)
726+
LICENSE FOR VariantKey (https://github.com/tecnickcom/variantkey)
727727

728728
The MIT License
729729

730730
Copyright (c) 2017-2018 GENOMICS plc
731+
Copyright (c) 2018-2023 Nicola Asuni - Tecnick.com
731732

732733
Permission is hereby granted, free of charge, to any person obtaining a copy
733734
of this software and associated documentation files (the "Software"), to deal

Makefile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ OBJS = main.o vcfindex.o tabix.o \
4242
regidx.o smpl_ilist.o csq.o vcfbuf.o \
4343
mpileup.o bam2bcf.o bam2bcf_indel.o bam2bcf_iaux.o read_consensus.o bam_sample.o \
4444
vcfsort.o cols.o extsort.o dist.o abuf.o \
45-
ccall.o em.o prob1.o kmin.o str_finder.o
45+
ccall.o em.o prob1.o kmin.o str_finder.o gff.o
4646
PLUGIN_OBJS = vcfplugin.o
4747

4848
prefix = /usr/local
@@ -104,7 +104,7 @@ endif
104104

105105
include config.mk
106106

107-
PACKAGE_VERSION = 1.17
107+
PACKAGE_VERSION = 1.18
108108

109109
# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
110110
# description of the working tree: either a release tag with the same value
@@ -246,7 +246,7 @@ vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htsli
246246
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(bcftools_h)
247247
vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h) $(filter_h)
248248
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) regidx.h $(bcftools_h) vcmp.h $(htslib_khash_h)
249-
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h abuf.h
249+
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h abuf.h gff.h
250250
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h) $(smpl_ilist_h)
251251
vcfroh.o: vcfroh.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(bcftools_h) HMM.h $(smpl_ilist_h) $(filter_h)
252252
vcfcnv.o: vcfcnv.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(bcftools_h) HMM.h rbuf.h
@@ -289,6 +289,7 @@ vcfbuf.o: vcfbuf.c $(htslib_vcf_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcf
289289
abuf.o: abuf.c $(htslib_vcf_h) $(bcftools_h) rbuf.h abuf.h
290290
extsort.o: extsort.c $(bcftools_h) extsort.h kheap.h
291291
smpl_ilist.o: smpl_ilist.c $(bcftools_h) $(smpl_ilist_h)
292+
gff.o: gff.c gff.h regidx.h
292293
csq.o: csq.c $(htslib_hts_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_h) $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) regidx.h kheap.h $(smpl_ilist_h) rbuf.h
293294

294295
# test programs

NEWS

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,121 @@
1+
## Release 1.18 (25th July 2023)
2+
3+
4+
Changes affecting the whole of bcftools, or multiple commands:
5+
6+
* Support auto indexing during writing BCF and VCF.gz via new `--write-index` option
7+
8+
9+
Changes affecting specific commands:
10+
11+
* bcftools annotate
12+
13+
- The `-m, --mark-sites` option can be now used to mark all sites without the
14+
need to provide the `-a` file (#1861)
15+
16+
- Fix a bug where the `-m` function did not respect the `--min-overlap` option (#1869)
17+
18+
- Fix a bug when update of INFO/END results in assertion error (#1957)
19+
20+
* bcftools concat
21+
22+
- New option `--drop-genotypes`
23+
24+
* bcftools consensus
25+
26+
- Support higher-ploidy genotypes with `-H, --haplotype` (#1892)
27+
28+
- Allow `--mark-ins` and `--mark-snv` with a character, similarly to `--mark-del`
29+
30+
* bcftools convert
31+
32+
- Support for conversion from tab-delimited files (CHROM,POS,REF,ALT) to sites-only VCFs
33+
34+
* bcftools csq
35+
36+
- New `--unify-chr-names` option to automatically unify different chromosome
37+
naming conventions in the input GFF, fasta and VCF files (e.g. "chrX" vs "X")
38+
39+
- More versatility in parsing various flavors of GFF
40+
41+
- A new `--dump-gff` option to help with debugging and investigating the internals
42+
of hGFF parsing
43+
44+
- When printing consequences in nonsense mediated decay transcripts, include 'NMD_transcript'
45+
in the consequence part of the annotation. This is to make filtering easier and analogous to
46+
VEP annotations. For example the consequence annotation
47+
3_prime_utr|PCGF3|ENST00000430644|NMD
48+
is newly printed as
49+
3_prime_utr&NMD_transcript|PCGF3|ENST00000430644|NMD
50+
51+
* bcftools gtcheck
52+
53+
- Add stats for the number of sites matched in the GT-vs-GT, GT-vs-PL, etc modes. This
54+
information is important for interpretation of the discordance score, as only the
55+
GT-vs-GT matching can be interpreted as the number of mismatching genotypes.
56+
57+
* bcftools +mendelian2
58+
59+
- Fix in command line argument parsing, the `-p` and `-P` options were not
60+
functioning (#1906)
61+
62+
* bcftools merge
63+
64+
- New `-M, --missing-rules` option to control the behavior of merging of vector tags
65+
to prevent mixtures of known and missing values in tags when desired
66+
67+
- Use values pertaining to the unknown allele (<*> or <NON_REF>) when available
68+
to prevent mixtures of known and missing values (#1888)
69+
70+
- Revamped line matching code to fix problems in gVCF merging where split gVCF blocks
71+
would not update genotypes (#1891, #1164).
72+
73+
* bcftool mpileup
74+
75+
- Fix a bug in --indels-v2.0 which caused an endless loop when CIGAR operator 'H' or 'P'
76+
was encountered
77+
78+
* bcftools norm
79+
80+
- The `-m, --multiallelics +` mode now preserves phasing (#1893)
81+
82+
- Symbolic <DEL.*> alleles are now normalized too (#1919)
83+
84+
- New `-g, --gff-annot` option to right-align indels in forward transcripts to follow
85+
HGVS 3'rule (#1929)
86+
87+
* bcftools query
88+
89+
- Force newline character in formatting expression when not given explicitly
90+
91+
- Fix `-H` header output in formatting expressions containing newlines
92+
93+
* bcftools reheader
94+
95+
- Make `-f, --fai` aware of long contigs not representable by 32-bit integer (#1959)
96+
97+
* bcftools +split-vep
98+
99+
- Prevent a segfault when `-i/-e` use a VEP subfield not included in `-f` or `-c` (#1877)
100+
101+
- New `-X, --keep-sites` option complementing the existing `-x, --drop-sites` options
102+
103+
- Force newline character in formatting expression when not given explicitly
104+
105+
- Fix a subtle ambiguity: identical rows must be returned when `-s` is applied regardless
106+
of `-f` containing the `-a` VEP tag itself or not.
107+
108+
* bcftools stats
109+
110+
- Collect new VAF (variant allele frequency) statistics from FORMAT/AD field
111+
112+
- When counting transitions/transversions, consider also alternate het genotypes
113+
114+
* plot-vcfstats
115+
116+
- Add three new VAF plots
117+
118+
1119
## Release 1.17 (21st February 2023)
2120

3121

bcftools.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* bcftools.h -- utility function declarations.
22
3-
Copyright (C) 2013-2022 Genome Research Ltd.
3+
Copyright (C) 2013-2023 Genome Research Ltd.
44
55
Author: Petr Danecek <[email protected]>
66
@@ -49,6 +49,9 @@ void error(const char *format, ...) HTS_NORETURN HTS_FORMAT(HTS_PRINTF_FMT, 1, 2
4949
// newline will be added by the function.
5050
void error_errno(const char *format, ...) HTS_NORETURN HTS_FORMAT(HTS_PRINTF_FMT, 1, 2);
5151

52+
// For on the fly index creation with --write-index
53+
int init_index(htsFile *fh, bcf_hdr_t *hdr, char *fname, char **idx_fname);
54+
5255
void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *cmd);
5356
const char *hts_bcf_wmode(int file_type);
5457
const char *hts_bcf_wmode2(int file_type, const char *fname);

cigar_state.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ static inline int cstate_seek_fwd(cigar_state_t *cs, hts_pos_t *pos_ptr, int tri
107107
cs->icig++;
108108
continue;
109109
}
110+
if ( op==BAM_CHARD_CLIP || op==BAM_CPAD )
111+
{
112+
cs->icig++;
113+
continue;
114+
}
115+
error("FIXME: not ready for CIGAR operator %d\n",op);
110116
}
111117
// the read starts after pos
112118
if ( trim_left )
@@ -175,6 +181,12 @@ static inline int cstate_seek_op_fwd(cigar_state_t *cs, hts_pos_t pos, int seek_
175181
cs->icig++;
176182
continue;
177183
}
184+
if ( op==BAM_CHARD_CLIP || op==BAM_CPAD )
185+
{
186+
cs->icig++;
187+
continue;
188+
}
189+
error("FIXME: not ready for CIGAR operator %d\n",op);
178190
}
179191
return cs->icig < cs->ncig ? -1 : -2;
180192
}

consensus.c

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@
5454
#define PICK_SHORT 8
5555
#define PICK_IUPAC 16
5656

57-
#define TO_UPPER 0
58-
#define TO_LOWER 1
57+
#define TO_UPPER 1
58+
#define TO_LOWER 2
5959

6060
typedef struct
6161
{
@@ -324,7 +324,7 @@ static void init_region(args_t *args, char *line)
324324
{
325325
char *ss, *se = line;
326326
while ( *se && !isspace(*se) && *se!=':' ) se++;
327-
int from = 0, to = 0;
327+
hts_pos_t from = 0, to = 0;
328328
char tmp = 0, *tmp_ptr = NULL;
329329
if ( *se )
330330
{
@@ -356,7 +356,14 @@ static void init_region(args_t *args, char *line)
356356
args->fa_frz_mod = -1;
357357
args->fa_case = -1;
358358
args->vcf_rbuf.n = 0;
359-
bcf_sr_seek(args->files,line,args->fa_ori_pos);
359+
360+
kstring_t str = {0,0,0};
361+
if ( from==0 ) from = 1;
362+
if ( to==0 ) to = HTS_POS_MAX;
363+
ksprintf(&str,"%s:%"PRIhts_pos"-%"PRIhts_pos,line,from,to);
364+
bcf_sr_set_regions(args->files,line,0);
365+
free(str.s);
366+
360367
if ( tmp_ptr ) *tmp_ptr = tmp;
361368
fprintf(args->fp_out,">%s%s\n",args->chr_prefix?args->chr_prefix:"",line);
362369
if ( args->chain_fname )
@@ -466,25 +473,37 @@ static char *mark_del(char *ref, int rlen, char *alt, int mark)
466473
static void mark_ins(char *ref, char *alt, char mark)
467474
{
468475
int i, nref = strlen(ref), nalt = strlen(alt);
469-
if ( mark=='l' )
476+
if ( mark==TO_LOWER )
470477
for (i=nref; i<nalt; i++) alt[i] = tolower(alt[i]);
471-
else
478+
else if ( mark==TO_UPPER )
472479
for (i=nref; i<nalt; i++) alt[i] = toupper(alt[i]);
480+
else if ( mark )
481+
for (i=nref; i<nalt; i++) alt[i] = mark;
473482
}
474483
static void mark_snv(char *ref, char *alt, char mark)
475484
{
476485
int i, nref = strlen(ref), nalt = strlen(alt);
477486
int n = nref < nalt ? nref : nalt;
478-
if ( mark=='l' )
487+
if ( mark==TO_LOWER )
479488
{
480489
for (i=0; i<n; i++)
481490
if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = tolower(alt[i]);
482491
}
483-
else
492+
else if ( mark==TO_UPPER)
484493
{
485494
for (i=0; i<n; i++)
486495
if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = toupper(alt[i]);
487496
}
497+
else if ( mark==TO_UPPER)
498+
{
499+
for (i=0; i<n; i++)
500+
if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = toupper(alt[i]);
501+
}
502+
else if ( mark )
503+
{
504+
for (i=0; i<n; i++)
505+
if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = mark;
506+
}
488507
}
489508
static void iupac_init(args_t *args, bcf1_t *rec)
490509
{
@@ -1099,19 +1118,18 @@ static void usage(args_t *args)
10991118
fprintf(stderr, " -f, --fasta-ref FILE Reference sequence in fasta format\n");
11001119
fprintf(stderr, " -H, --haplotype WHICH Choose which allele to use from the FORMAT/GT field, note\n");
11011120
fprintf(stderr, " the codes are case-insensitive:\n");
1102-
fprintf(stderr, " 1: first allele from GT, regardless of phasing\n");
1103-
fprintf(stderr, " 2: second allele from GT, regardless of phasing\n");
1121+
fprintf(stderr, " N: N={1,2,3,..} is the index of the allele from GT, regardless of phasing (e.g. \"2\")\n");
11041122
fprintf(stderr, " R: REF allele in het genotypes\n");
11051123
fprintf(stderr, " A: ALT allele\n");
11061124
fprintf(stderr, " I: IUPAC code for all genotypes\n");
11071125
fprintf(stderr, " LR,LA: longer allele and REF/ALT if equal length\n");
11081126
fprintf(stderr, " SR,SA: shorter allele and REF/ALT if equal length\n");
1109-
fprintf(stderr, " 1pIu,2pIu: first/second allele for phased and IUPAC code for unphased GTs\n");
1127+
fprintf(stderr, " NpIu: index of the allele for phased and IUPAC code for unphased GTs (e.g. \"2pIu\")\n");
11101128
fprintf(stderr, " -i, --include EXPR Select sites for which the expression is true (see man page for details)\n");
11111129
fprintf(stderr, " -I, --iupac-codes Output IUPAC codes based on FORMAT/GT, use -s/-S to subset samples\n");
1112-
fprintf(stderr, " --mark-del CHAR Instead of removing sequence, insert CHAR for deletions\n");
1113-
fprintf(stderr, " --mark-ins uc|lc Highlight insertions in uppercase (uc) or lowercase (lc), leaving the rest as is\n");
1114-
fprintf(stderr, " --mark-snv uc|lc Highlight substitutions in uppercase (uc) or lowercase (lc), leaving the rest as is\n");
1130+
fprintf(stderr, " --mark-del CHAR Instead of removing sequence, insert character CHAR for deletions\n");
1131+
fprintf(stderr, " --mark-ins uc|lc|CHAR Highlight insertions in uppercase (uc), lowercase (lc), or use CHAR, leaving the rest as is\n");
1132+
fprintf(stderr, " --mark-snv uc|lc|CHAR Highlight substitutions in uppercase (uc), lowercase (lc), or use CHAR, leaving the rest as is\n");
11151133
fprintf(stderr, " -m, --mask FILE Replace regions according to the next --mask-with option. The default is --mask-with N\n");
11161134
fprintf(stderr, " --mask-with CHAR|uc|lc Replace with CHAR (skips overlapping variants); change to uppercase (uc) or lowercase (lc)\n");
11171135
fprintf(stderr, " -M, --missing CHAR Output CHAR instead of skipping a missing genotype \"./.\"\n");
@@ -1163,13 +1181,15 @@ int main_consensus(int argc, char *argv[])
11631181
{
11641182
case 1 : args->mark_del = optarg[0]; break;
11651183
case 2 :
1166-
if ( !strcasecmp(optarg,"uc") ) args->mark_ins = 'u';
1167-
else if ( !strcasecmp(optarg,"lc") ) args->mark_ins = 'l';
1184+
if ( !strcasecmp(optarg,"uc") ) args->mark_ins = TO_UPPER;
1185+
else if ( !strcasecmp(optarg,"lc") ) args->mark_ins = TO_LOWER;
1186+
else if ( !optarg[1] && optarg[0]>32 && optarg[0]<127 ) args->mark_ins = optarg[0];
11681187
else error("The argument is not recognised: --mark-ins %s\n",optarg);
11691188
break;
11701189
case 3 :
1171-
if ( !strcasecmp(optarg,"uc") ) args->mark_snv = 'u';
1172-
else if ( !strcasecmp(optarg,"lc") ) args->mark_snv = 'l';
1190+
if ( !strcasecmp(optarg,"uc") ) args->mark_snv = TO_UPPER;
1191+
else if ( !strcasecmp(optarg,"lc") ) args->mark_snv = TO_LOWER;
1192+
else if ( !optarg[1] && optarg[0]>32 && optarg[0]<127 ) args->mark_snv = optarg[0];
11731193
else error("The argument is not recognised: --mark-snv %s\n",optarg);
11741194
break;
11751195
case 'p': args->chr_prefix = optarg; break;
@@ -1211,7 +1231,8 @@ int main_consensus(int argc, char *argv[])
12111231
{
12121232
char *tmp;
12131233
args->haplotype = strtol(optarg, &tmp, 10);
1214-
if ( tmp==optarg || *tmp ) error("Error: Could not parse --haplotype %s, expected numeric argument\n", optarg);
1234+
if ( tmp==optarg || (*tmp && strcasecmp(tmp,"pIu")) ) error("Error: Could not parse \"--haplotype %s\", expected number of number followed with \"pIu\"\n", optarg);
1235+
if ( *tmp ) args->allele |= PICK_IUPAC;
12151236
if ( args->haplotype <=0 ) error("Error: Expected positive integer with --haplotype\n");
12161237
}
12171238
break;

0 commit comments

Comments
 (0)