Skip to content

Commit 116a87c

Browse files
committed
Release 1.17
2 parents e7f638b + fac806b commit 116a87c

File tree

144 files changed

+6863
-1785
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

144 files changed

+6863
-1785
lines changed

.cirrus.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,8 @@ rockylinux_task:
139139

140140
macosx_task:
141141
name: macosx + clang
142-
osx_instance:
143-
image: catalina-base
142+
macos_instance:
143+
image: ghcr.io/cirruslabs/macos-ventura-base:latest
144144

145145
environment:
146146
CC: clang

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ the INSTALL document), the use of this software is governed by the GPL license.
99

1010
The MIT/Expat License
1111

12-
Copyright (C) 2012-2021 Genome Research Ltd.
12+
Copyright (C) 2012-2023 Genome Research Ltd.
1313

1414
Permission is hereby granted, free of charge, to any person obtaining a copy
1515
of this software and associated documentation files (the "Software"), to deal

Makefile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ OBJS = main.o vcfindex.o tabix.o \
4040
vcfcall.o mcall.o vcmp.o gvcf.o reheader.o convert.o vcfconvert.o tsv2vcf.o \
4141
vcfcnv.o vcfhead.o HMM.o consensus.o ploidy.o bin.o hclust.o version.o \
4242
regidx.o smpl_ilist.o csq.o vcfbuf.o \
43-
mpileup.o bam2bcf.o bam2bcf_indel.o bam_sample.o \
43+
mpileup.o bam2bcf.o bam2bcf_indel.o bam2bcf_iaux.o read_consensus.o bam_sample.o \
4444
vcfsort.o cols.o extsort.o dist.o abuf.o \
4545
ccall.o em.o prob1.o kmin.o str_finder.o
4646
PLUGIN_OBJS = vcfplugin.o
@@ -104,7 +104,7 @@ endif
104104

105105
include config.mk
106106

107-
PACKAGE_VERSION = 1.16
107+
PACKAGE_VERSION = 1.17
108108

109109
# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
110110
# description of the working tree: either a release tag with the same value
@@ -279,6 +279,8 @@ consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf
279279
mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(htslib_hts_os_h) regidx.h $(bcftools_h) $(bam2bcf_h) $(bam_sample_h) $(gvcf_h)
280280
bam2bcf.o: bam2bcf.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(bam2bcf_h) mw.h
281281
bam2bcf_indel.o: bam2bcf_indel.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) str_finder.h
282+
bam2bcf_iaux.o: bam2bcf_iaux.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) str_finder.h read_consensus.h cigar_state.h
283+
read_consensus.o: read_consensus.c read_consensus.h cigar_state.h $(htslib_hts_h) $(htslib_sam_h)
282284
bam_sample.o: bam_sample.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(khash_str2str_h) $(bam_sample_h) $(bcftools_h)
283285
version.o: version.h version.c
284286
hclust.o: hclust.c $(htslib_hts_h) $(htslib_kstring_h) $(bcftools_h) hclust.h

NEWS

Lines changed: 165 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,170 @@
1-
## Release 1.16 (18th August 2022)
1+
## Release 1.17 (21st February 2023)
2+
3+
4+
Changes affecting the whole of bcftools, or multiple commands:
5+
6+
* The -i/-e filtering expressions
7+
8+
- Error checks were added to prevent incorrect use of vector arithmetics. For example,
9+
when evaluating the sum of two vectors A and B, the resulting vector could contain
10+
nonsense values when the input vectors were not of the same length. The fix introduces
11+
the following logic:
12+
- evaluate to C_i = A_i + B_i when length(A)==B(A) and set length(C)=length(A)
13+
- evaluate to C_i = A_i + B_0 when length(B)=1 and set length(C)=length(A)
14+
- evaluate to C_i = A_0 + B_i when length(A)=1 and set length(C)=length(B)
15+
- throw an error when length(A)!=length(B) AND length(A)!=1 AND length(B)!=1
16+
17+
- Arrays in Number=R tags can be now subscripted by alleles found in FORMAT/GT. For example,
18+
19+
FORMAT/AD[GT] > 10 .. require support of more than 10 reads for each allele
20+
FORMAT/AD[0:GT] > 10 .. same as above, but in the first sample
21+
sSUM(FORMAT/AD[GT]) > 20 .. require total sample depth bigger than 20
22+
23+
* The commands `consensus -H` and `+split-vep -H`
24+
25+
- Drop unnecessary leading space in the first header column and newly print `#[1]columnName`
26+
instead of the previous `# [1]columnName` (#1856)
27+
28+
29+
Changes affecting specific commands:
30+
31+
* bcftools +allele-length
32+
33+
- Fix overflow for indels longer than 512bp and aggregate alleles equal or larger than
34+
that in the same bin (#1837)
35+
36+
* bcftools annotate
37+
38+
- Support sample reordering of annotation file (#1785)
39+
40+
- Restore lost functionality of the --pair-logic option (#1808)
41+
42+
* bcftools call
43+
44+
- Fix a bug where too many alleles passed to `-C alleles` via `-T` caused memory
45+
corruption (#1790)
46+
47+
- Fix a bug where indels constrained with `-C alleles -T` would sometimes be missed (#1706)
48+
49+
* bcftools consensus
50+
51+
- BREAKING CHANGE: the option `-I, --iupac-codes` newly outputs IUPAC codes based on FORMAT/GT
52+
of all samples. The `-s, --samples` and `-S, --samples-file` options can be used to subset
53+
samples. In order to ignore samples and consider only the REF and ALT columns (the original
54+
behavior prior to 1.17), run with `-s -` (#1828)
55+
56+
* bcftools convert
57+
58+
- Make variantkey conversion work for sites without an ALT allele (#1806)
59+
60+
* bcftool csq
61+
62+
- Fix a bug where a MNV with multiple consequences (e.g. missense + stop_gained)
63+
would report only the less severe one (#1810)
64+
65+
- GFF file parsing was made slightly more flexible, newly ids can be just 'XXX'
66+
rather than, for example, 'gene:XXX'
67+
68+
- New gff2gff perl script to fix GFF formatting differences
69+
70+
* bcftools +fill-tags
71+
72+
- More of the available annotations are now added by the `-t all` option
73+
74+
* bcftools +fixref
75+
76+
- New INFO/FIXREF annotation
77+
78+
- New -m swap mode
279

80+
* bcftools +mendelian
381

82+
- The +mendelian plugin has been deprecated and replaced with +mendelian2. The
83+
function of the plugin is the same but the command line options and the output
84+
format has changed, and for this was introduced as a new plugin.
85+
86+
* bcftools mpileup
87+
88+
- Most of the annotations generated by mpileup are now optional via the
89+
`-a, --annotate` option and add several new (mostly experimental) annotations.
90+
91+
- New option `--indels-2.0` for an EXPERIMENTAL indel calling model. This model aims
92+
to address some known deficiencies of the current indel calling algorithm, specifically,
93+
it uses diploid reference consensus sequence. Note that in the current version it
94+
has the potential to increase sensitivity but at the cost of decreased specificity.
95+
96+
- Make the FS annotation (Fisher exact test strand bias) functional and remove it
97+
from the default annotations
98+
99+
* bcftools norm
100+
101+
- New --multi-overlaps option allows to set overlapping alleles either to the
102+
ref allele (the current default) or to a missing allele (#1764 and #1802)
103+
104+
- Fixed a bug in `-m -` which does not split missing FORMAT values correctly and
105+
could lead to empty FORMAT fields such as `::` instead of the correct `:.:` (#1818)
106+
107+
- The `--atomize` option previously would not split complex indels such as C>GGG.
108+
Newly these will be split into two records C>G and C>CGG (#1832)
109+
110+
* bcftools query
111+
112+
- Fix a rare bug where the printing of SAMPLE field with `query` was incorrectly
113+
suppressed when the `-e` option contained a sample expression while the formatting
114+
query did not. See #1783 for details.
115+
116+
* bcftools +setGT
117+
118+
- Add new `--new-gt X` option (#1800)
119+
120+
- Add new `--target-gt r:FLOAT` option to randomly select a proportion of genotypes (#1850)
121+
122+
- Fix a bug where `-t ./x` mode was advertised as selecting both phased and unphased
123+
half-missing genotypes, but was in fact selecting only unphased genotypes (#1844)
124+
125+
* bcftools +split-vep
126+
127+
- New options `-g, --gene-list` and `--gene-list-fields` which allow to prioritize
128+
consequences from a list of genes, or restrict output to the listed genes
129+
130+
- New `-H, --print-header` option to print the header with `-f`
131+
132+
- Work around a bug in the LOFTEE VEP plugin used to annotate gnomAD VCFs. There the
133+
LoF_info subfield contains commas which, in general, makes it impossible to parse the
134+
VEP subfields. The +split-vep plugin can now work with such files, replacing the offending
135+
commas with slash (/) characters. See also https://github.com/Ensembl/ensembl-vep/issues/1351
136+
137+
- Newly the `-c, --columns` option can be omitted when a subfield is used in `-i/-e` filtering
138+
expression. Note that `-c` may still have to be given when it is not possible to infer the
139+
type of the subfield. Note that this is an experimental feature.
140+
141+
* bcftools stats
142+
143+
- The per-sample stats (PSC) would not be computed when `-i/-e` filtering options and
144+
the `-s -` option were given but the expression did not include sample columns (1835)
145+
146+
* bcftools +tag2tag
147+
148+
- Revamp of the plugin to allow wider range of tag conversions, specifically all combinations
149+
from FORMAT/GL,PL,GP to FORMAT/GL,PL,GP,GT
150+
151+
* bcftools +trio-dnm2
152+
153+
- New `-n, --strictly-novel` option to downplay alleles which violate Mendelian
154+
inheritance but are not novel
155+
156+
- Allow to set the `--pn` and `--pns` options separately for SNVs and indels and make
157+
the indel settings more strict by default
158+
159+
- Output missing FORMAT/VAF values in non-trio samples, rather than random nonsense values
160+
161+
* bcftools +variant-distance
162+
163+
- New option `-d, --direction` to choose the directionality: forward, reverse, nearest (the default)
164+
or both (#1829)
165+
166+
167+
## Release 1.16 (18th August 2022)
4168

5169
* New plugin `bcftools +variant-distance` to annotate records with distance to the
6170
nearest variant (#1690)
@@ -44,7 +208,6 @@ Changes affecting specific commands:
44208

45209
- Custom genotypes (e.g. `-n c:1/1`) now correctly override ploidy
46210

47-
48211
## Release 1.15.1 (7th April 2022)
49212

50213

abuf.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* The MIT License
22
3-
Copyright (c) 2021-2022 Genome Research Ltd.
3+
Copyright (c) 2021-2023 Genome Research Ltd.
44
55
Author: Petr Danecek <[email protected]>
66
@@ -154,22 +154,33 @@ static void _atomize_allele(abuf_t *buf, bcf1_t *rec, int ial)
154154
assert(atom);
155155
if ( altb!='-' ) kputc(altb, &atom->alt);
156156
if ( refb!='-' ) { kputc(refb, &atom->ref); atom->end++; }
157+
continue;
157158
}
158-
else
159+
buf->natoms++;
160+
hts_expand0(atom_t,buf->natoms,buf->matoms,buf->atoms);
161+
atom = &buf->atoms[buf->natoms-1];
162+
atom->ref.l = 0;
163+
atom->alt.l = 0;
164+
kputc(refb, &atom->ref);
165+
kputc(altb, &atom->alt);
166+
atom->beg = atom->end = i;
167+
atom->ial = ial;
168+
169+
if ( rlen!=alen && (i+1>=rlen || i+1>=alen) ) // the next base is an indel combined with SNV, e.g. C>GGG?
159170
{
160171
buf->natoms++;
161172
hts_expand0(atom_t,buf->natoms,buf->matoms,buf->atoms);
162173
atom = &buf->atoms[buf->natoms-1];
163174
atom->ref.l = 0;
164175
atom->alt.l = 0;
165176
kputc(refb, &atom->ref);
166-
kputc(altb, &atom->alt);
177+
kputc(refb, &atom->alt);
167178
atom->beg = atom->end = i;
168179
atom->ial = ial;
169180
}
170181
continue;
171182
}
172-
if ( i+1>=rlen || i+1>=alen ) // is the next base a deletion?
183+
if ( i+1>=rlen || i+1>=alen ) // is the next base an indel?
173184
{
174185
buf->natoms++;
175186
hts_expand0(atom_t,buf->natoms,buf->matoms,buf->atoms);
@@ -742,6 +753,8 @@ void _abuf_split(abuf_t *buf, bcf1_t *rec)
742753
_split_table_overlap(buf, j, atom);
743754
}
744755
}
756+
// _split_table_print(buf);
757+
// _split_table_print_atoms(buf);
745758
assert( !buf->rbuf.n ); // all records should be flushed first in the SPLIT mode
746759

747760
// Create the output records, transferring all annotations:

0 commit comments

Comments
 (0)