Skip to content

Commit 02ee548

Browse files
committed
Release 1.20
2 parents bb75b76 + 67974ca commit 02ee548

File tree

119 files changed

+6825
-923
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+6825
-923
lines changed

LICENSE

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ the INSTALL document), the use of this software is governed by the GPL license.
99

1010
The MIT/Expat License
1111

12-
Copyright (C) 2012-2023 Genome Research Ltd.
12+
Copyright (C) 2012-2024 Genome Research Ltd.
1313

1414
Permission is hereby granted, free of charge, to any person obtaining a copy
1515
of this software and associated documentation files (the "Software"), to deal
@@ -772,3 +772,28 @@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
772772
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
773773
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
774774
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
775+
776+
-----------------------------------------------------------------------------
777+
778+
License for edlib.[ch]
779+
780+
The MIT License (MIT)
781+
782+
Copyright (c) 2014 Martin Šošić
783+
784+
Permission is hereby granted, free of charge, to any person obtaining a copy of
785+
this software and associated documentation files (the "Software"), to deal in
786+
the Software without restriction, including without limitation the rights to
787+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
788+
the Software, and to permit persons to whom the Software is furnished to do so,
789+
subject to the following conditions:
790+
791+
The above copyright notice and this permission notice shall be included in all
792+
copies or substantial portions of the Software.
793+
794+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
795+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
796+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
797+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
798+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
799+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

Makefile

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@ OBJS = main.o vcfindex.o tabix.o \
4040
vcfcall.o mcall.o vcmp.o gvcf.o reheader.o convert.o vcfconvert.o tsv2vcf.o \
4141
vcfcnv.o vcfhead.o HMM.o consensus.o ploidy.o bin.o hclust.o version.o \
4242
regidx.o smpl_ilist.o csq.o vcfbuf.o \
43-
mpileup.o bam2bcf.o bam2bcf_indel.o bam2bcf_iaux.o read_consensus.o bam_sample.o \
43+
mpileup.o bam2bcf.o bam2bcf_indel.o bam2bcf_iaux.o bam2bcf_edlib.o \
44+
read_consensus.o bam_sample.o \
4445
vcfsort.o cols.o extsort.o dist.o abuf.o \
45-
ccall.o em.o prob1.o kmin.o str_finder.o gff.o
46+
ccall.o em.o prob1.o kmin.o str_finder.o gff.o edlib.o
4647
PLUGIN_OBJS = vcfplugin.o
4748

4849
prefix = /usr/local
@@ -104,7 +105,7 @@ endif
104105

105106
include config.mk
106107

107-
PACKAGE_VERSION = 1.19
108+
PACKAGE_VERSION = 1.20
108109

109110
# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
110111
# description of the working tree: either a release tag with the same value
@@ -142,7 +143,9 @@ print-version:
142143
ifdef USE_GPL
143144
main.o : EXTRA_CPPFLAGS += -DUSE_GPL
144145
OBJS += polysomy.o peakfit.o
145-
GSL_LIBS ?= -lgsl -lcblas
146+
ifndef GSL_LIBS
147+
GSL_LIBS += -lgsl -lcblas
148+
endif
146149
endif
147150

148151
print-%:
@@ -232,6 +235,7 @@ vcfbuf_h = vcfbuf.h $(htslib_vcf_h)
232235
abuf_h = abuf.h $(htslib_vcf_h)
233236
dbuf_h = dbuf.h $(htslib_vcf_h)
234237
bam2bcf_h = bam2bcf.h $(htslib_hts_h) $(htslib_vcf_h)
238+
edlib.h = edlib.h
235239
bam_sample_h = bam_sample.h $(htslib_sam_h)
236240
cigar_state_h = cigar_state.h $(htslib_hts_h) $(htslib_sam_h)
237241
read_consensus_h = read_consensus.h $(htslib_hts_h) $(htslib_sam_h)
@@ -242,8 +246,8 @@ main.o: main.c $(htslib_hts_h) config.h version.h $(bcftools_h)
242246
vcfannotate.o: vcfannotate.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h) $(convert_h) $(smpl_ilist_h) regidx.h $(htslib_khash_h) $(dbuf_h)
243247
vcfplugin.o: vcfplugin.c config.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h)
244248
vcfcall.o: vcfcall.c $(htslib_vcf_h) $(htslib_kfunc_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(bcftools_h) $(call_h) $(prob1_h) $(ploidy_h) $(gvcf_h) regidx.h $(vcfbuf_h)
245-
vcfconcat.o: vcfconcat.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_thread_pool_h) $(bcftools_h)
246-
vcfconvert.o: vcfconvert.c $(htslib_faidx_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kseq_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h)
249+
vcfconcat.o: vcfconcat.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) $(bcftools_h)
250+
vcfconvert.o: vcfconvert.c $(htslib_faidx_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kseq_h) $(htslib_hts_endian_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h)
247251
vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) rbuf.h regidx.h
248252
vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kbitset_h) $(htslib_hts_os_h) $(htslib_bgzf_h) $(bcftools_h) extsort.h filter.h
249253
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(bcftools_h)
@@ -261,10 +265,10 @@ vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfu
261265
reheader.o: reheader.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_kseq_h) $(htslib_thread_pool_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) $(khash_str2str_h)
262266
tabix.o: tabix.c $(htslib_bgzf_h) $(htslib_tbx_h)
263267
ccall.o: ccall.c $(htslib_kfunc_h) $(call_h) kmin.h $(prob1_h)
264-
convert.o: convert.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(bcftools_h) $(variantkey_h) $(convert_h) $(filter_h)
268+
convert.o: convert.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(htslib_hts_endian_h) $(bcftools_h) $(variantkey_h) $(convert_h) $(filter_h)
265269
tsv2vcf.o: tsv2vcf.c $(tsv2vcf_h)
266270
em.o: em.c $(htslib_vcf_h) kmin.h $(call_h)
267-
filter.o: filter.c $(htslib_khash_str2int_h) $(htslib_hts_defs_h) $(htslib_vcfutils_h) $(htslib_kfunc_h) config.h $(filter_h) $(bcftools_h)
271+
filter.o: filter.c $(htslib_khash_str2int_h) $(htslib_hts_defs_h) $(htslib_vcfutils_h) $(htslib_kfunc_h) $(htslib_hts_endian_h) config.h $(filter_h) $(bcftools_h)
268272
$(CC) $(CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CPPFLAGS) $(PERL_CFLAGS) -c -o $@ $<
269273
gvcf.o: gvcf.c $(gvcf_h) $(bcftools_h)
270274
kmin.o: kmin.c kmin.h
@@ -283,6 +287,7 @@ mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(hts
283287
bam2bcf.o: bam2bcf.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(bam2bcf_h) mw.h
284288
bam2bcf_indel.o: bam2bcf_indel.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) $(str_finder_h)
285289
bam2bcf_iaux.o: bam2bcf_iaux.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bcftools_h) $(bam2bcf_h) $(htslib_ksort_h) $(read_consensus_h) $(cigar_state_h)
290+
bam2bcf_edlib.o: bam2bcf_edlib.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bcftools_h) $(bam2bcf_h) $(htslib_ksort_h) $(read_consensus_h) $(cigar_state_h) $(edlib.h)
286291
read_consensus.o: read_consensus.c $(read_consensus_h) $(cigar_state_h) $(bcftools_h) kheap.h
287292
bam_sample.o: bam_sample.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(khash_str2str_h) $(bam_sample_h) $(bcftools_h)
288293
version.o: version.h version.c

NEWS

Lines changed: 103 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,99 @@
1+
## Release 1.20 (15th April 2024)
2+
3+
4+
Changes affecting the whole of bcftools, or multiple commands:
5+
6+
* Add short option -W for --write-index. The option now accepts an optional parameter
7+
which allows to choose between TBI and CSI index format.
8+
9+
10+
Changes affecting specific commands:
11+
12+
* bcftools consensus
13+
14+
- Add new --regions-overlap option which allows to take into account overlapping deletions
15+
that start out of the fasta file target region.
16+
17+
* bcftools isec
18+
19+
- Add new option `-l, --file-list` to read the list of file names from a file
20+
21+
* bcftools merge
22+
23+
- Add new option `--force-single` to support single-file edge case (#2100)
24+
25+
* bcftools mpileup
26+
27+
- Add new option --indels-cns for an alternative indel calling model, which should increase
28+
the speed on long read data (thanks to using edlib) and the precision (thanks to a number
29+
of heuristics).
30+
31+
* bcftools norm
32+
33+
- Change the order of atomization and multiallelic splitting (when both -a,-m are given)
34+
from "atomize first, then split" to "split first, then atomize". This usually results
35+
in a simpler VCF representation. The previous behaviour can be achieved by explicitly
36+
streaming the output of the --atomize command into the --multiallelics splitting command.
37+
38+
- Fix Type=String multiallelic splitting for Number=A,R,G tags with incorrect number
39+
of values.
40+
41+
- Merging into multiallelic sites with `bcftools norm -m +indels` did not work. This is
42+
now fixed and the merging is now more strict about variant types, for example complex
43+
events, such as AC>TGA, are not considered as indels anymore (#2084)
44+
45+
* bcftools reheader
46+
47+
- Allow reading the input file from a stream with --fai (#2088)
48+
49+
* bcftools +setGT
50+
51+
- Support for custom genotypes based on the allele with higher depth, such
52+
as `--new-gt c:0/X` custom genotypes (#2065)
53+
54+
* bcftools +split-vep
55+
56+
- When only one of the tags is present, automatically choose INFO/BCSQ (the default
57+
tag name produced by `bcftools csq`) or INFO/CSQ (produced by VEP). When both
58+
tags are present, use the default INFO/CSQ.
59+
60+
- Transcript selection by MANE, PICK, and user-defined transcripts, for example
61+
62+
--select CANONICAL=YES
63+
--select MANE_SELECT!=""
64+
--select PolyPhen~probably_damaging
65+
66+
- Select all matching transcripts via --select, not just one
67+
68+
- Change automatic type parsing of VEP fields DNA_position, CDS_position, and Protein_position
69+
from Integer to String, as it can be of the form "8586-8599/9231". The type Integer can be
70+
still enforced with `-c cDNA_position:int,CDS_position:int,Protein_position:int`.
71+
72+
- Recognize `-c field:str`, not just `-c field:string`, as advertised in the usage page
73+
74+
- Fix a bug which made filtering expression containing missing values crash (#2098)
75+
76+
* bcftools stats
77+
78+
- When GT is missing but AD is present, the program determines the alternate allele from AD.
79+
However, if the AD tag has incorrect number of values, the program would exit with an error
80+
printing "Requested allele outside valid range". This is now fixed by taking into account
81+
the actual number of ALT alleles.
82+
83+
* bcftools +tag2tag
84+
85+
- Support for conversion from tags using localized alleles (e.g. LPL, LAD) to the family of
86+
standard tags (PL, AD)
87+
88+
* bcftools +trio-dnm2
89+
90+
- Extend --strictly-novel to exclude cases where the non-Mendelian allele
91+
is the reference allele. The change is motivated by the observation that
92+
this class of variants is enriched for errors (especially for indels),
93+
and better corresponds with the option name.
94+
95+
96+
197
## Release 1.19 (12th December 2023)
298

399

@@ -338,7 +434,7 @@ Changes affecting specific commands:
338434

339435
* bcftools norm
340436

341-
- New --multi-overlaps option allows to set overlapping alleles either to the
437+
- New --multi-overlaps option allows setting overlapping alleles either to the
342438
ref allele (the current default) or to a missing allele (#1764 and #1802)
343439

344440
- Fixed a bug in `-m -` which does not split missing FORMAT values correctly and
@@ -509,7 +605,7 @@ Changes affecting specific commands:
509605
- In addition to `--rename-annots`, which requires a file with name mappings,
510606
it is now possible to do the same on the command line `-c NEW_TAG:=OLD_TAG`
511607

512-
- Add new option --min-overlap which allows to specify the minimum required
608+
- Add new option --min-overlap to specify the minimum required
513609
overlap of intersecting regions
514610

515611
- Allow to transfer ALT from VCF with or without replacement using
@@ -569,7 +665,7 @@ Changes affecting specific commands:
569665
* bcftools query
570666

571667
- Make the `--samples` and `--samples-file` options work also in the `--list-samples`
572-
mode. Add a new `--force-samples` option which allows to proceed even when some of
668+
mode. Add a new `--force-samples` option which enables proceeding even when some of
573669
the requested samples are not present in the VCF (#1631)
574670

575671
* bcftools +setGT
@@ -682,7 +778,7 @@ Changes affecting specific commands:
682778

683779
* bcftools mpileup:
684780

685-
- new --indel-size option which allows to increase the maximum considered
781+
- new --indel-size option which allows increase of the maximum considered
686782
indel size considered, large deletions in long read data are otherwise
687783
lost.
688784

@@ -903,7 +999,7 @@ Changes affecting specific commands:
903999

9041000
- New `--rename-annots` option to help fix broken VCFs (#1335)
9051001

906-
- New -C option allows to read a long list of options from a file to
1002+
- New -C option allows a long list of options to be read from a file to
9071003
prevent very long command lines.
9081004

9091005
- New `append-missing` logic allows annotations to be added for each ALT
@@ -1114,7 +1210,7 @@ Changes affecting specific commands:
11141210

11151211
- Preserve the case of the genome reference. (#1150)
11161212

1117-
- Add new `-a, --absent` option which allows to set positions with no
1213+
- Add new `-a, --absent` option which allows setting positions with no
11181214
supporting evidence to "N" (or any other character). (#848; #940)
11191215

11201216
* bcftools convert:
@@ -1162,7 +1258,7 @@ Changes affecting specific commands:
11621258
- Local alleles merging that produce LAA and LPL when requested, a draft
11631259
implementation of https://github.com/samtools/hts-specs/pull/434 (#1138)
11641260

1165-
- New `--no-index` which allows to merge unindexed files. Requires the input
1261+
- New `--no-index` which allows unindexed files to be merged. Requires the input
11661262
files to have chromosomes in th same order and consistent with the order
11671263
of sequences in the header. (PR #1253; samtools/htslib#1089)
11681264

abuf.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* The MIT License
22
3-
Copyright (c) 2021-2023 Genome Research Ltd.
3+
Copyright (c) 2021-2024 Genome Research Ltd.
44
55
Author: Petr Danecek <[email protected]>
66
@@ -418,6 +418,14 @@ static void _split_table_set_history(abuf_t *buf)
418418
{
419419
int i,j,ret;
420420
bcf1_t *rec = buf->split.rec;
421+
422+
// Don't update if the tag already exists. This is to prevent -a from overwriting -m
423+
int m = 0;
424+
char *tmp = NULL;
425+
ret = bcf_get_info_string(buf->hdr,rec,buf->split.info_tag,&tmp,&m);
426+
free(tmp);
427+
if ( ret>0 ) return;
428+
421429
buf->tmps.l = 0;
422430
ksprintf(&buf->tmps,"%s|%"PRIhts_pos"|%s|",bcf_seqname(buf->hdr,rec),rec->pos+1,rec->d.allele[0]);
423431
for (i=1; i<rec->n_allele; i++)

0 commit comments

Comments
 (0)