Skip to content

Commit 580b52c

Browse files
committed
Release 1.15
2 parents 5f1bf7a + 310cd8c commit 580b52c

File tree

110 files changed

+3025
-885
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

110 files changed

+3025
-885
lines changed

.appveyor.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ install:
2626
- set MSYSTEM=MINGW64
2727
- set PATH=C:/msys64/usr/bin;C:/msys64/mingw64/bin;%PATH%
2828
- set MINGWPREFIX=x86_64-w64-mingw32
29-
- "sh -lc \"pacman -S --noconfirm --needed base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2 mingw-w64-x86_64-xz mingw-w64-x86_64-curl\""
29+
- "sh -lc \"pacman -S --noconfirm --needed base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-autotools mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2 mingw-w64-x86_64-xz mingw-w64-x86_64-curl\""
3030

3131
# The user may have e.g. jkbonfield/bcftools branch FOO and an associated
3232
# jkbonfield/htslib branch FOO. If so use that related htslib, obtained by

.cirrus.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,11 @@ ubuntu_task:
109109
<< : *TEST
110110

111111

112-
# CentOS
113-
centos_task:
114-
name: centos-gcc
112+
# Rocky Linux
113+
rockylinux_task:
114+
name: rockylinux-gcc
115115
container:
116-
image: centos:latest
116+
image: rockylinux:latest
117117
cpu: 2
118118
memory: 1G
119119

INSTALL

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,16 +218,22 @@ Note: libcurl4-openssl-dev can be used as an alternative to libcurl4-gnutls-dev.
218218
RedHat / CentOS
219219
---------------
220220

221+
Note: To install gsl-devel, it may be necessary to enable the "crb" repository.
222+
dnf --enablerepo=crb install gsl-devel
223+
221224
sudo yum install autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel gsl-devel perl-ExtUtils-Embed
222225

226+
Note: On some versions, Perl FindBin will need to be installed to make the tests work.
227+
sudo yum install perl-FindBin
228+
223229
Alpine Linux
224230
------------
225231

226232
Note: To install gsl-dev, it may be necessary to enable the "community"
227233
repository in /etc/apk/repositories.
228234

229-
sudo apk update # Ensure the package list is up to date
230-
sudo apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev libressl-dev gsl-dev perl-dev
235+
doas apk update # Ensure the package list is up to date
236+
doas apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev libressl-dev gsl-dev perl-dev
231237

232238
OpenSUSE
233239
--------
@@ -240,4 +246,23 @@ MacOS, assuming Xcode is installed:
240246
xz
241247
gsl (optional)
242248

249+
Windows MSYS2/MINGW64
250+
---------------------
251+
252+
The configure script must be used as without it the compilation will
253+
likely fail.
254+
255+
Follow MSYS2 installation instructions at
256+
https://www.msys2.org/wiki/MSYS2-installation/
257+
258+
Then relaunch to MSYS2 shell using the "MSYS2 MinGW x64" executable.
259+
Once in that environment (check $MSYSTEM equals "MINGW64") install the
260+
compilers using pacman -S and the following package list:
261+
262+
base-devel mingw-w64-x86_64-toolchain
263+
mingw-w64-x86_64-libdeflate mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2
264+
mingw-w64-x86_64-xz mingw-w64-x86_64-curl mingw-w64-x86_64-autotools
265+
mingw-w64-x86_64-tools-git
266+
267+
(The last is only needed for building libraries compatible with MSVC.)
243268

Makefile

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ OBJS = main.o vcfindex.o tabix.o \
3838
vcfstats.o vcfisec.o vcfmerge.o vcfquery.o vcffilter.o filter.o vcfsom.o \
3939
vcfnorm.o vcfgtcheck.o vcfview.o vcfannotate.o vcfroh.o vcfconcat.o \
4040
vcfcall.o mcall.o vcmp.o gvcf.o reheader.o convert.o vcfconvert.o tsv2vcf.o \
41-
vcfcnv.o HMM.o consensus.o ploidy.o bin.o hclust.o version.o \
41+
vcfcnv.o vcfhead.o HMM.o consensus.o ploidy.o bin.o hclust.o version.o \
4242
regidx.o smpl_ilist.o csq.o vcfbuf.o \
4343
mpileup.o bam2bcf.o bam2bcf_indel.o bam_sample.o \
4444
vcfsort.o cols.o extsort.o dist.o abuf.o \
@@ -104,7 +104,7 @@ endif
104104

105105
include config.mk
106106

107-
PACKAGE_VERSION = 1.14
107+
PACKAGE_VERSION = 1.15
108108

109109
# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
110110
# description of the working tree: either a release tag with the same value
@@ -217,7 +217,7 @@ bcftools: $(OBJS) $(HTSLIB)
217217

218218
plugins: $(PLUGINS)
219219

220-
bcftools_h = bcftools.h $(htslib_hts_defs_h) $(htslib_vcf_h)
220+
bcftools_h = bcftools.h $(htslib_hts_defs_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h)
221221
call_h = call.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) vcmp.h
222222
variantkey_h = variantkey.h hex.h
223223
convert_h = convert.h $(htslib_vcf_h)
@@ -240,15 +240,16 @@ vcfplugin.o: vcfplugin.c config.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h)
240240
vcfcall.o: vcfcall.c $(htslib_vcf_h) $(htslib_kfunc_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(bcftools_h) $(call_h) $(prob1_h) $(ploidy_h) $(gvcf_h) regidx.h $(vcfbuf_h)
241241
vcfconcat.o: vcfconcat.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_thread_pool_h) $(bcftools_h)
242242
vcfconvert.o: vcfconvert.c $(htslib_faidx_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kseq_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h)
243-
vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) rbuf.h
243+
vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) rbuf.h regidx.h
244244
vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kbitset_h) $(htslib_hts_os_h) $(bcftools_h) extsort.h
245245
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(bcftools_h)
246246
vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h) $(filter_h)
247247
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) regidx.h $(bcftools_h) vcmp.h $(htslib_khash_h)
248248
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h abuf.h
249-
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h)
249+
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h) $(smpl_ilist_h)
250250
vcfroh.o: vcfroh.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(bcftools_h) HMM.h $(smpl_ilist_h) $(filter_h)
251251
vcfcnv.o: vcfcnv.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(bcftools_h) HMM.h rbuf.h
252+
vcfhead.o: vcfhead.c $(htslib_kstring_h) $(htslib_vcf_h) $(bcftools_h)
252253
vcfsom.o: vcfsom.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h)
253254
vcfsort.o: vcfsort.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_hts_os_h) kheap.h $(bcftools_h)
254255
vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) bin.h dist.h

NEWS

Lines changed: 104 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,106 @@
1+
## Release 1.15 (21st February 2022)
2+
3+
4+
* New `bcftools head` subcommand for conveniently displaying the headers
5+
of a VCF or BCF file. Without any options, this is equivalent to
6+
`bcftools view --header-only --no-version` but more succinct and memorable.
7+
8+
* The `-T, --targets-file` option had the following bug originating in HTSlib code:
9+
when an uncompressed file with multiple columns CHR,POS,REF was provided, the
10+
REF would be interpreted as 0 gigabases (#1598)
11+
12+
Changes affecting specific commands:
13+
14+
* bcftools annotate
15+
16+
- In addition to `--rename-annots`, which requires a file with name mappings,
17+
it is now possible to do the same on the command line `-c NEW_TAG:=OLD_TAG`
18+
19+
- Add new option --min-overlap which allows to specify the minimum required
20+
overlap of intersecting regions
21+
22+
- Allow to transfer ALT from VCF with or without replacement using
23+
bcftools annotate -a annots.vcf.gz -c ALT file.vcf.gz
24+
bcftools annotate -a annots.vcf.gz -c +ALT file.vcf.gz
25+
26+
* bcftools convert
27+
28+
- Revamp of `--gensample`, `--hapsample` and `--haplegendsample` family of options
29+
which includes the following changes:
30+
31+
- New `--3N6` option to output/input the new version of the .gen file format,
32+
see https://www.cog-genomics.org/plink/2.0/formats#gen
33+
34+
- Deprecate the `--chrom` option in favor of `--3N6`. A simple `cut` command
35+
can be used to convert from the new 3*M+6 column format to the format printed
36+
with `--chrom` (`cut -d' ' -f1,3-`).
37+
38+
- The CHROM:POS_REF_ALT IDs which are used to detect strand swaps are required
39+
and must appear either in the "SNP ID" column or the "rsID" column. The column
40+
is autodetected for `--gensample2vcf`, can be the first or the second for
41+
`--hapsample2vcf` (depending on whether the `--vcf-ids` option is given), must be
42+
the first for `--haplegendsample2vcf`.
43+
44+
* bcftools csq
45+
46+
- Allow GFF files with phase column unset
47+
48+
* bcftools filter
49+
50+
- New `--mask`, `--mask-file` and `--mask-overlap` options to soft filter
51+
variants in regions (#1635)
52+
53+
* bcftools +fixref
54+
55+
- The `-m id` option now works also for non-dbSNP ids, i.e. not just `rsINT`
56+
57+
- New `-m flip-all` mode for flipping all sites, including ambiguous A/T and C/G sites
58+
59+
* bcftools isec
60+
61+
- Prevent segfault on sites filtered with -i/-e in all files (#1632)
62+
63+
* bcftools mpileup
64+
65+
- More flexible read filtering using the options
66+
--ls, --skip-all-set .. skip reads with all of the FLAG bits set
67+
--ns, --skip-any-set .. skip reads with any of the FLAG bits set
68+
--lu, --skip-all-unset .. skip reads with all of the FLAG bits unset
69+
--nu, --skip-any-unset .. skip reads with any of the FLAG bits unset
70+
71+
The existing synonymous options will continue to function but their use
72+
is discouraged
73+
--rf, --incl-flags STR|INT Required flags: skip reads with mask bits unset
74+
--ff, --excl-flags STR|INT Filter flags: skip reads with mask bits set
75+
76+
* bcftools query
77+
78+
- Make the `--samples` and `--samples-file` options work also in the `--list-samples`
79+
mode. Add a new `--force-samples` option which allows to proceed even when some of
80+
the requested samples are not present in the VCF (#1631)
81+
82+
* bcftools +setGT
83+
84+
- Fix a bug in `-t q -e EXPR` logic applied on FORMAT fields, sites with all
85+
samples failing the expression EXPR were incorrectly skipped. This problem
86+
affected only the use of `-e` logic, not the `-i` expressions (#1607)
87+
88+
* bcftools sort
89+
90+
- make use of the TMPDIR environment variable when defined
91+
92+
* bcftools +trio-dnm2
93+
94+
- The --use-NAIVE mode now also adds the de novo allele in FORMAT/VA
95+
96+
197
## Release 1.14 (22nd October 2021)
298

399

4100
Changes affecting the whole of bcftools, or multiple commands:
5101

6102
* New `--regions-overlap` and `--targets-overlap` options which address
7-
a long-standing design problem with subsetting VCF files by region.
103+
a long-standing design problem with subsetting VCF files by region.
8104
BCFtools recognize two sets of options, one for streaming (`-t/-T`) and
9105
one for index-gumping (`-r/-R`). They behave differently, the first
10106
includes only records with POS coordinate within the regions, the other
@@ -32,11 +128,11 @@ Changes affecting specific commands:
32128
by using `-c INFO/END`.
33129

34130
- add a new '.' modifier to control wheter missing values should be carried
35-
over from a tab-delimited file or not. For example:
131+
over from a tab-delimited file or not. For example:
36132

37133
-c TAG .. adds TAG if the source value is not missing. If TAG
38134
exists in the target file, it will be overwritten
39-
135+
40136
-c .TAG .. adds TAG even if the source value is missing. This
41137
can overwrite non-missing values with a missing value
42138
and can create empty VCF fields (`TAG=.`)
@@ -165,7 +261,7 @@ Changes affecting specific commands:
165261
* bcftools +fill-tags:
166262

167263
- Generalization and better support for custom functions that allow
168-
adding new INFO tags based on arbitrary `-i, --include` type of
264+
adding new INFO tags based on arbitrary `-i, --include` type of
169265
expressions. For example, to calculate a missing INFO/DP annotation
170266
from FORMAT/AD, it is possible to use:
171267

@@ -229,7 +325,7 @@ Changes affecting specific commands:
229325

230326
- Atomization of AD and QS tags now correctly updates occurrences of duplicate
231327
alleles within different haplotypes
232-
328+
233329
- Fix a bug in atomization of Number=A,R tags
234330

235331
* bcftools reheader:
@@ -241,7 +337,7 @@ Changes affecting specific commands:
241337
- A wider range of genotypes can be set by the plugin by allowing
242338
specifying custom genotypes. For example, to force a heterozygous
243339
genotype it is now possible to use expressions like:
244-
340+
245341
c:'m|M'
246342
c:0/1
247343
c:0
@@ -253,7 +349,7 @@ Changes affecting specific commands:
253349
- Better handling of ambiguous keys such as INFO/AF and CSQ/AD. The
254350
`-p, --annot-prefix` option is now applied before doing anything else
255351
which allows its use with `-f, --format` and `-c, --columns` options.
256-
352+
257353
- Some consequence field names may not constitute a valid tag name, such
258354
as "pos(1-based)". Newly field names are trimmed to exclude brackets.
259355

@@ -383,7 +479,7 @@ Changes affecting specific commands:
383479

384480
* bcftools csq:
385481

386-
- Fix a bug wich caused incorrect FORMAT/BCSQ formatting at sites with too
482+
- Fix a bug wich caused incorrect FORMAT/BCSQ formatting at sites with too
387483
many per-sample consequences
388484

389485
- Fix a bug which incorrectly handled the --ncsq parameter and could clash

bam_sample.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/* bam_sample.c -- group data by sample.
22
33
Copyright (C) 2010, 2011 Broad Institute.
4-
Copyright (C) 2013, 2016-2018 Genome Research Ltd.
4+
Copyright (C) 2013, 2016-2022 Genome Research Ltd.
55
66
Author: Heng Li <[email protected]>, Petr Danecek <[email protected]>
77
@@ -281,7 +281,7 @@ int bam_smpl_add_samples(bam_smpl_t *bsmpl, char *list, int is_file)
281281

282282
int i, nsamples = 0;
283283
char **samples = hts_readlist(list, is_file, &nsamples);
284-
if ( !nsamples ) return 0;
284+
if ( !samples || !nsamples ) return 0;
285285

286286
kstring_t ori = {0,0,0};
287287
kstring_t ren = {0,0,0};
@@ -328,7 +328,7 @@ int bam_smpl_add_readgroups(bam_smpl_t *bsmpl, char *list, int is_file)
328328

329329
int i, nrows = 0;
330330
char **rows = hts_readlist(list, is_file, &nrows);
331-
if ( !nrows ) return 0;
331+
if ( !rows || !nrows ) return 0;
332332

333333
kstring_t fld1 = {0,0,0};
334334
kstring_t fld2 = {0,0,0};

bcftools.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* bcftools.h -- utility function declarations.
22
3-
Copyright (C) 2013-2021 Genome Research Ltd.
3+
Copyright (C) 2013-2022 Genome Research Ltd.
44
55
Author: Petr Danecek <[email protected]>
66
@@ -28,6 +28,7 @@ THE SOFTWARE. */
2828
#include <stdarg.h>
2929
#include <htslib/hts_defs.h>
3030
#include <htslib/vcf.h>
31+
#include <htslib/synced_bcf_reader.h>
3132
#include <math.h>
3233

3334
#define FT_TAB_TEXT 0 // custom tab-delimited text file
@@ -50,9 +51,11 @@ void error_errno(const char *format, ...) HTS_NORETURN HTS_FORMAT(HTS_PRINTF_FMT
5051

5152
void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *cmd);
5253
const char *hts_bcf_wmode(int file_type);
53-
const char *hts_bcf_wmode2(int file_type, char *fname);
54-
void set_wmode(char dst[8], int file_type, char *fname, int compression_level); // clevel: 0-9 with or zb type, -1 unset
54+
const char *hts_bcf_wmode2(int file_type, const char *fname);
55+
void set_wmode(char dst[8], int file_type, const char *fname, int compression_level); // clevel: 0-9 with or zb type, -1 unset
5556
char *init_tmp_prefix(const char *prefix);
57+
int read_AF(bcf_sr_regions_t *tgt, bcf1_t *line, double *alt_freq);
58+
int parse_overlap_option(const char *arg);
5659

5760
void *smalloc(size_t size); // safe malloc
5861

bin.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* The MIT License
22
3-
Copyright (c) 2016 Genome Research Ltd.
3+
Copyright (c) 2016-2022 Genome Research Ltd.
44
55
Author: Petr Danecek <[email protected]>
66
@@ -43,6 +43,7 @@ bin_t *bin_init(const char *list_def, float min, float max)
4343
int is_file = strchr(list_def,',') ? 0 : 1;
4444
int i, nlist;
4545
char **list = hts_readlist(list_def, is_file, &nlist);
46+
if ( !list ) error("Error: failed to read %s\n",list_def);
4647
bin->nbins = nlist;
4748
bin->bins = (float*) malloc(sizeof(float)*nlist);
4849
for (i=0; i<nlist; i++)

consensus.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -733,11 +733,14 @@ static void apply_variant(args_t *args, bcf1_t *rec)
733733
if ( rec->rlen > args->fa_buf.l - idx )
734734
{
735735
rec->rlen = args->fa_buf.l - idx;
736-
alen = strlen(alt_allele);
737-
if ( alen > rec->rlen )
736+
if ( alt_allele[0]!='<' )
738737
{
739-
alt_allele[rec->rlen] = 0;
740-
fprintf(stderr,"Warning: trimming variant starting at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
738+
alen = strlen(alt_allele);
739+
if ( alen > rec->rlen )
740+
{
741+
fprintf(stderr,"Warning: trimming variant \"%s\" starting at %s:%"PRId64"\n", alt_allele,bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
742+
alt_allele[rec->rlen] = 0;
743+
}
741744
}
742745
}
743746
if ( idx>=args->fa_buf.l )
@@ -749,7 +752,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
749752
// TODO: symbolic deletions probably need more work above with PICK_SHORT|PICK_LONG
750753

751754
if ( strcasecmp(alt_allele,"<DEL>") && strcasecmp(alt_allele,"<*>") && strcasecmp(alt_allele,"<NON_REF>") )
752-
error("Symbolic alleles other than <DEL>, <*> or <NON_REF> are currently not supported, e.g. %s at %s:%"PRId64".\n"
755+
error("Symbolic alleles other than <DEL>, <*> or <NON_REF> are currently not supported, e.g. \"%s\" at %s:%"PRId64".\n"
753756
"Please use filtering expressions to exclude such sites, for example by running with: -e 'ALT~\"<.*>\"'\n",
754757
alt_allele,bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
755758
if ( !strcasecmp(alt_allele,"<DEL>") )

0 commit comments

Comments
 (0)