Skip to content

Commit 44deedc

Browse files
committed
Add support for optional removal of the unseen allele
The `bcftools merge -m` command can now suppress the output of the unseen allele <*> or <NON_REF> at variant sites (e.g. `-m both,*`) or all sites (e.g. `-m both,**`). The `bcftools view` has a new option `-A, --trim-unseen-allele` to remove the unseen allele at variant sites (`-A`) or all sites (`-AA`) Resolves #2023
1 parent 39a81be commit 44deedc

File tree

13 files changed

+331
-32
lines changed

13 files changed

+331
-32
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_
248248
vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kbitset_h) $(htslib_hts_os_h) $(htslib_bgzf_h) $(bcftools_h) extsort.h filter.h
249249
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(bcftools_h)
250250
vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h) $(filter_h)
251-
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) regidx.h $(bcftools_h) vcmp.h $(htslib_khash_h)
251+
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) regidx.h $(bcftools_h) vcmp.h $(htslib_khash_h) $(htslib_kbitset_h)
252252
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h abuf.h gff.h regidx.h
253253
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h) $(smpl_ilist_h)
254254
vcfroh.o: vcfroh.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(bcftools_h) HMM.h $(smpl_ilist_h) $(filter_h)
@@ -257,7 +257,7 @@ vcfhead.o: vcfhead.c $(htslib_kstring_h) $(htslib_vcf_h) $(bcftools_h)
257257
vcfsom.o: vcfsom.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h)
258258
vcfsort.o: vcfsort.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_hts_os_h) kheap.h $(bcftools_h)
259259
vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) bin.h dist.h
260-
vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(htslib_khash_str2int_h)
260+
vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(htslib_khash_str2int_h) $(htslib_kbitset_h)
261261
reheader.o: reheader.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_kseq_h) $(htslib_thread_pool_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) $(khash_str2str_h)
262262
tabix.o: tabix.c $(htslib_bgzf_h) $(htslib_tbx_h)
263263
ccall.o: ccall.c $(htslib_kfunc_h) $(call_h) kmin.h $(prob1_h)

NEWS

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ Changes affecting specific commands:
5555

5656
- better description of fields
5757

58+
* bcftools merge
59+
60+
- Add `-m` modifiers to suppress the output of the unseen allele <*> or <NON_REF>
61+
at variant sites (e.g. `-m both,*`) or all sites (e.g. `-m both,**`)
5862

5963
* bcftools mpileup
6064

@@ -93,6 +97,11 @@ Changes affecting specific commands:
9397

9498
- Include sample name in the output header with `-H` whenever it makes sense (#1992)
9599

100+
* bcftools view
101+
102+
- Add new `-A, --trim-unseen-allele` option to remove the unseen allele <*> or <NON_REF>
103+
at variant sites (`-A`) or all sites (`-AA`)
104+
96105

97106
## Release 1.18 (25th July 2023)
98107

bcftools.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,4 +141,16 @@ static inline int bcf_double_test(double d, uint64_t value)
141141
#define bcf_double_is_missing(x) bcf_double_test((x),bcf_double_missing)
142142
#define bcf_double_is_missing_or_vector_end(x) (bcf_double_test((x),bcf_double_missing) || bcf_double_test((x),bcf_double_vector_end))
143143

144+
static inline int get_unseen_allele(bcf1_t *line)
145+
{
146+
int i;
147+
for (i=1; i<line->n_allele; i++)
148+
{
149+
if ( !strcmp(line->d.allele[i],"<*>") ) return i;
150+
if ( !strcmp(line->d.allele[i],"<NON_REF>") ) return i;
151+
if ( !strcmp(line->d.allele[i],"<X>") ) return i;
152+
}
153+
return 0;
154+
}
155+
144156
#endif

doc/bcftools.1

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
.\" Title: bcftools
33
.\" Author: [see the "AUTHOR(S)" section]
44
.\" Generator: Asciidoctor 2.0.16
5-
.\" Date: 2023-10-23
5+
.\" Date: 2023-11-09
66
.\" Manual: \ \&
77
.\" Source: \ \&
88
.\" Language: English
99
.\"
10-
.TH "BCFTOOLS" "1" "2023-10-23" "\ \&" "\ \&"
10+
.TH "BCFTOOLS" "1" "2023-11-09" "\ \&" "\ \&"
1111
.ie \n(.g .ds Aq \(aq
1212
.el .ds Aq '
1313
.ss \n[.ss] 0
@@ -51,7 +51,7 @@ standard input (stdin) and outputs to the standard output (stdout). Several
5151
commands can thus be combined with Unix pipes.
5252
.SS "VERSION"
5353
.sp
54-
This manual page was last updated \fB2023\-10\-23 09:31 CEST\fP and refers to bcftools git version \fB1.18\-19\-g6374c6a+\fP.
54+
This manual page was last updated \fB2023\-11\-09 17:03 GMT\fP and refers to bcftools git version \fB1.18\-24\-g39a81be+\fP.
5555
.SS "BCF1"
5656
.sp
5757
The obsolete BCF1 format output by versions of samtools <= 0.1.19 is \fBnot\fP
@@ -2500,7 +2500,13 @@ in\-memory sorting and DIR is the temporary directory for external sorting. This
25002500
Stop after first record to estimate required time.
25012501
.RE
25022502
.sp
2503-
\fB\-e, \-\-error\-probability\fP \fIINT\fP
2503+
\fB\-e, \-\-exclude\fP [\fIqry\fP|\fIgt\fP]:\*(AqEXPRESSION\*(Aq
2504+
.RS 4
2505+
Exclude sites from query file (\fIqry:\fP) or genotype file (\fIgt:\fP) for which \fIEXPRESSION\fP is true.
2506+
For valid expressions see \fBEXPRESSIONS\fP.
2507+
.RE
2508+
.sp
2509+
\fB\-E, \-\-error\-probability\fP \fIINT\fP
25042510
.RS 4
25052511
Interpret genotypes and genotype likelihoods probabilistically. The value of \fIINT\fP
25062512
represents genotype quality when GT tag is used (e.g. Q=30 represents one error in 1,000 genotypes and
@@ -2510,13 +2516,20 @@ non\-zero integer can be provided).
25102516
.br
25112517
\~
25122518
.br
2513-
If \fB\-e\fP is set to 0, the discordance score can be interpreted as the number of mismatching genotypes,
2519+
If \fB\-E\fP is set to 0, the discordance score can be interpreted as the number of mismatching genotypes,
25142520
but only in the GT\-vs\-GT matching mode. See the \fB\-u, \-\-use\fP option below for additional notes and caveats.
25152521
\~
25162522
.br
25172523
\~
25182524
.br
2519-
If performance is an issue, set \fB\-e 0\fP for faster run times but less accurate results.
2525+
If performance is an issue, set \fB\-E 0\fP for faster run times but less accurate results.
2526+
\~
2527+
.br
2528+
\~
2529+
.br
2530+
Note that in previous versions of bcftools (\(lA1.18), this option used to be a smaller case \fB\-e\fP. It
2531+
changed to make room for the filtering option \fB\-e, \-\-exclude\fP to stay consistent across other
2532+
commands.
25202533
.RE
25212534
.sp
25222535
\fB\-g, \-\-genotypes\fP \fIFILE\fP
@@ -2529,6 +2542,12 @@ VCF/BCF file with reference genotypes to compare against
25292542
Homozygous genotypes only, useful with low coverage data (requires \fB\-g, \-\-genotypes\fP)
25302543
.RE
25312544
.sp
2545+
\fB\-i, \-\-include\fP [\fIqry\fP|\fIgt\fP]:\*(AqEXPRESSION\*(Aq
2546+
.RS 4
2547+
Include sites from query file (\fIqry:\fP) or genotype file (\fIgt:\fP) for which \fIEXPRESSION\fP is true.
2548+
For valid expressions see \fBEXPRESSIONS\fP.
2549+
.RE
2550+
.sp
25322551
\fB\-\-n\-matches\fP \fIINT\fP
25332552
.RS 4
25342553
Print only top INT matches for each sample, 0 for unlimited. Use negative value
@@ -2542,6 +2561,16 @@ Disable calculation of HWE probability to reduce memory requirements with
25422561
comparisons between very large number of sample pairs.
25432562
.RE
25442563
.sp
2564+
\fB\-o, \-\-output\fP \fIFILE\fP
2565+
.RS 4
2566+
Write to \fIFILE\fP rather than to standard output, where it is written by default.
2567+
.RE
2568+
.sp
2569+
\fB\-O, \-\-output\-type\fP \fIt\fP|\fIz\fP
2570+
.RS 4
2571+
Write a plain (\fIt\fP) or compressed (\fIz\fP) text tab\-delimited output.
2572+
.RE
2573+
.sp
25452574
\fB\-p, \-\-pairs\fP \fILIST\fP
25462575
.RS 4
25472576
A comma\-separated list of sample pairs to compare. When the \fB\-g\fP option is given, the first
@@ -2647,6 +2676,14 @@ By default, all header lines are displayed.
26472676
Also display the first \fIINT\fP variant records.
26482677
By default, no variant records are displayed.
26492678
.RE
2679+
.sp
2680+
\fB\-s, \-\-samples\fP \fIINT\fP
2681+
.RS 4
2682+
Display the first \fIINT\fP variant records including the last #CHROM header line with samples.
2683+
Running with \fB\-s 0\fP alone outputs the #CHROM header line only. Note that
2684+
the list of samples, with each sample per line, can be obtained with \f(CRbcftools query\fP using
2685+
the option \fB\-l, \-\-list\-samples\fP.
2686+
.RE
26502687
.SS "bcftools index [\fIOPTIONS\fP] \fIin.bcf\fP|\fIin.vcf.gz\fP"
26512688
.sp
26522689
Creates index for bgzip compressed VCF/BCF files for random access. CSI
@@ -2950,9 +2987,11 @@ maximum number of alternate alleles that can be included in the PL tag. The defa
29502987
is 0 which disables the feature and outputs values for all alternate alleles.
29512988
.RE
29522989
.sp
2953-
\fB\-m, \-\-merge\fP \fIsnps\fP|\fIindels\fP|\fIboth\fP|\fIsnp\-ins\-del\fP|\fIall\fP|\fInone\fP|\fIid\fP
2990+
\fB\-m, \-\-merge\fP \fIsnps\fP|\fIindels\fP|\fIboth\fP|\fIsnp\-ins\-del\fP|\fIall\fP|\fInone\fP|\fIid\fP[,\fI*\fP]
29542991
.RS 4
2955-
The option controls what types of multiallelic records can be created:
2992+
The option controls what types of multiallelic records can be created. If single asterisk
2993+
\fI\fB\fP is appended, the unobserved allele \fI<\fP>\fP or \fI<NON_REF>\fP will be removed at variant sites;
2994+
if two asterisks \fI**\fP are appended, the unobserved allele will be removed all sites.
29562995
.RE
29572996
.sp
29582997
.if n .RS 4
@@ -2962,6 +3001,8 @@ The option controls what types of multiallelic records can be created:
29623001
\-m snps .. allow multiallelic SNP records
29633002
\-m indels .. allow multiallelic indel records
29643003
\-m both .. both SNP and indel records can be multiallelic
3004+
\-m both,* .. same as above but remove <*> (or <NON_REF>) from variant sites
3005+
\-m both,** .. same as above but remove <*> (or <NON_REF>) at all sites
29653006
\-m all .. SNP records can be merged with indel records
29663007
\-m snp\-ins\-del .. allow multiallelic SNVs, insertions, deletions, but don\*(Aqt mix them
29673008
\-m id .. merge by ID
@@ -5144,6 +5185,12 @@ Automatically index the output file
51445185
.RE
51455186
.SS "Subset options:"
51465187
.sp
5188+
\fB\-A, \-\-trim\-unseen\-alleles\fP
5189+
.RS 4
5190+
remove the unseen allele \fI<*>\fP or \fI<NON_REF>\fP at variant sites when the option is given once (\-A) or
5191+
at all sites when the options is given twice (\fI\-AA\fP).
5192+
.RE
5193+
.sp
51475194
\fB\-a, \-\-trim\-alt\-alleles\fP
51485195
.RS 4
51495196
remove alleles not seen in the genotype fields from the ALT column. Note that if no alternate allele

doc/bcftools.html

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ <h2 id="_description">DESCRIPTION</h2>
5050
<div class="sect2">
5151
<h3 id="_version">VERSION</h3>
5252
<div class="paragraph">
53-
<p>This manual page was last updated <strong>2023-10-23 09:31 CEST</strong> and refers to bcftools git version <strong>1.18-19-g6374c6a+</strong>.</p>
53+
<p>This manual page was last updated <strong>2023-11-09 17:03 GMT</strong> and refers to bcftools git version <strong>1.18-24-g39a81be+</strong>.</p>
5454
</div>
5555
</div>
5656
<div class="sect2">
@@ -2200,19 +2200,29 @@ <h3 id="gtcheck">bcftools gtcheck [<em>OPTIONS</em>] [<strong>-g</strong> <em>ge
22002200
<dd>
22012201
<p>Stop after first record to estimate required time.</p>
22022202
</dd>
2203-
<dt class="hdlist1"><strong>-e, --error-probability</strong> <em>INT</em></dt>
2203+
<dt class="hdlist1"><strong>-e, --exclude</strong> [<em>qry</em>|<em>gt</em>]:'EXPRESSION'</dt>
2204+
<dd>
2205+
<p>Exclude sites from query file (<em>qry:</em>) or genotype file (<em>gt:</em>) for which <em>EXPRESSION</em> is true.
2206+
For valid expressions see <strong><a href="#expressions">EXPRESSIONS</a></strong>.</p>
2207+
</dd>
2208+
<dt class="hdlist1"><strong>-E, --error-probability</strong> <em>INT</em></dt>
22042209
<dd>
22052210
<p>Interpret genotypes and genotype likelihoods probabilistically. The value of <em>INT</em>
22062211
represents genotype quality when GT tag is used (e.g. Q=30 represents one error in 1,000 genotypes and
22072212
Q=40 one error in 10,000 genotypes) and is ignored when PL tag is used (in that case an arbitrary
22082213
non-zero integer can be provided).
22092214
&#160;<br>
22102215
&#160;<br>
2211-
If <strong>-e</strong> is set to 0, the discordance score can be interpreted as the number of mismatching genotypes,
2216+
If <strong>-E</strong> is set to 0, the discordance score can be interpreted as the number of mismatching genotypes,
22122217
but only in the GT-vs-GT matching mode. See the <strong>-u, --use</strong> option below for additional notes and caveats.
22132218
&#160;<br>
22142219
&#160;<br>
2215-
If performance is an issue, set <strong>-e 0</strong> for faster run times but less accurate results.</p>
2220+
If performance is an issue, set <strong>-E 0</strong> for faster run times but less accurate results.
2221+
&#160;<br>
2222+
&#160;<br>
2223+
Note that in previous versions of bcftools (&#8656;1.18), this option used to be a smaller case <strong>-e</strong>. It
2224+
changed to make room for the filtering option <strong>-e, --exclude</strong> to stay consistent across other
2225+
commands.</p>
22162226
</dd>
22172227
<dt class="hdlist1"><strong>-g, --genotypes</strong> <em>FILE</em></dt>
22182228
<dd>
@@ -2222,6 +2232,11 @@ <h3 id="gtcheck">bcftools gtcheck [<em>OPTIONS</em>] [<strong>-g</strong> <em>ge
22222232
<dd>
22232233
<p>Homozygous genotypes only, useful with low coverage data (requires <strong>-g, --genotypes</strong>)</p>
22242234
</dd>
2235+
<dt class="hdlist1"><strong>-i, --include</strong> [<em>qry</em>|<em>gt</em>]:'EXPRESSION'</dt>
2236+
<dd>
2237+
<p>Include sites from query file (<em>qry:</em>) or genotype file (<em>gt:</em>) for which <em>EXPRESSION</em> is true.
2238+
For valid expressions see <strong><a href="#expressions">EXPRESSIONS</a></strong>.</p>
2239+
</dd>
22252240
<dt class="hdlist1"><strong>--n-matches</strong> <em>INT</em></dt>
22262241
<dd>
22272242
<p>Print only top INT matches for each sample, 0 for unlimited. Use negative value
@@ -2233,6 +2248,14 @@ <h3 id="gtcheck">bcftools gtcheck [<em>OPTIONS</em>] [<strong>-g</strong> <em>ge
22332248
<p>Disable calculation of HWE probability to reduce memory requirements with
22342249
comparisons between very large number of sample pairs.</p>
22352250
</dd>
2251+
<dt class="hdlist1"><strong>-o, --output</strong> <em>FILE</em></dt>
2252+
<dd>
2253+
<p>Write to <em>FILE</em> rather than to standard output, where it is written by default.</p>
2254+
</dd>
2255+
<dt class="hdlist1"><strong>-O, --output-type</strong> <em>t</em>|<em>z</em></dt>
2256+
<dd>
2257+
<p>Write a plain (<em>t</em>) or compressed (<em>z</em>) text tab-delimited output.</p>
2258+
</dd>
22362259
<dt class="hdlist1"><strong>-p, --pairs</strong> <em>LIST</em></dt>
22372260
<dd>
22382261
<p>A comma-separated list of sample pairs to compare. When the <strong>-g</strong> option is given, the first
@@ -2339,6 +2362,13 @@ <h4 id="_options">Options:</h4>
23392362
<p>Also display the first <em>INT</em> variant records.
23402363
By default, no variant records are displayed.</p>
23412364
</dd>
2365+
<dt class="hdlist1"><strong>-s, --samples</strong> <em>INT</em></dt>
2366+
<dd>
2367+
<p>Display the first <em>INT</em> variant records including the last #CHROM header line with samples.
2368+
Running with <strong>-s 0</strong> alone outputs the #CHROM header line only. Note that
2369+
the list of samples, with each sample per line, can be obtained with <code>bcftools query</code> using
2370+
the option <strong>-l, --list-samples</strong>.</p>
2371+
</dd>
23422372
</dl>
23432373
</div>
23442374
</div>
@@ -2629,9 +2659,11 @@ <h3 id="merge">bcftools merge [<em>OPTIONS</em>] <em>A.vcf.gz</em> <em>B.vcf.gz<
26292659
maximum number of alternate alleles that can be included in the PL tag. The default value
26302660
is 0 which disables the feature and outputs values for all alternate alleles.</p>
26312661
</dd>
2632-
<dt class="hdlist1"><strong>-m, --merge</strong> <em>snps</em>|<em>indels</em>|<em>both</em>|<em>snp-ins-del</em>|<em>all</em>|<em>none</em>|<em>id</em></dt>
2662+
<dt class="hdlist1"><strong>-m, --merge</strong> <em>snps</em>|<em>indels</em>|<em>both</em>|<em>snp-ins-del</em>|<em>all</em>|<em>none</em>|<em>id</em>[,<em>*</em>]</dt>
26332663
<dd>
2634-
<p>The option controls what types of multiallelic records can be created:</p>
2664+
<p>The option controls what types of multiallelic records can be created. If single asterisk
2665+
<em><strong></em> is appended, the unobserved allele <em>&lt;</strong>&gt;</em> or <em>&lt;NON_REF&gt;</em> will be removed at variant sites;
2666+
if two asterisks <em>**</em> are appended, the unobserved allele will be removed all sites.</p>
26352667
</dd>
26362668
</dl>
26372669
</div>
@@ -2641,6 +2673,8 @@ <h3 id="merge">bcftools merge [<em>OPTIONS</em>] <em>A.vcf.gz</em> <em>B.vcf.gz<
26412673
-m snps .. allow multiallelic SNP records
26422674
-m indels .. allow multiallelic indel records
26432675
-m both .. both SNP and indel records can be multiallelic
2676+
-m both,* .. same as above but remove &lt;*&gt; (or &lt;NON_REF&gt;) from variant sites
2677+
-m both,** .. same as above but remove &lt;*&gt; (or &lt;NON_REF&gt;) at all sites
26442678
-m all .. SNP records can be merged with indel records
26452679
-m snp-ins-del .. allow multiallelic SNVs, insertions, deletions, but don't mix them
26462680
-m id .. merge by ID</pre>
@@ -4506,6 +4540,11 @@ <h4 id="_output_options_2">Output options</h4>
45064540
<h4 id="_subset_options">Subset options:</h4>
45074541
<div class="dlist">
45084542
<dl>
4543+
<dt class="hdlist1"><strong>-A, --trim-unseen-alleles</strong></dt>
4544+
<dd>
4545+
<p>remove the unseen allele <em>&lt;*&gt;</em> or <em>&lt;NON_REF&gt;</em> at variant sites when the option is given once (-A) or
4546+
at all sites when the options is given twice (<em>-AA</em>).</p>
4547+
</dd>
45094548
<dt class="hdlist1"><strong>-a, --trim-alt-alleles</strong></dt>
45104549
<dd>
45114550
<p>remove alleles not seen in the genotype fields from the ALT column. Note that if no alternate allele
@@ -5327,7 +5366,7 @@ <h2 id="_copying">COPYING</h2>
53275366
</div>
53285367
<div id="footer">
53295368
<div id="footer-text">
5330-
Last updated 2023-10-23 09:31:31 +0200
5369+
Last updated 2023-11-09 16:40:20 UTC
53315370
</div>
53325371
</div>
53335372
</body>

doc/bcftools.txt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2013,13 +2013,17 @@ For "vertical" merge take a look at *<<concat,bcftools concat>>* or *<<norm,bcft
20132013
maximum number of alternate alleles that can be included in the PL tag. The default value
20142014
is 0 which disables the feature and outputs values for all alternate alleles.
20152015

2016-
*-m, --merge* 'snps'|'indels'|'both'|'snp-ins-del'|'all'|'none'|'id'::
2017-
The option controls what types of multiallelic records can be created:
2016+
*-m, --merge* 'snps'|'indels'|'both'|'snp-ins-del'|'all'|'none'|'id'[,'*']::
2017+
The option controls what types of multiallelic records can be created. If single asterisk
2018+
'*' is appended, the unobserved allele '<*>' or '<NON_REF>' will be removed at variant sites;
2019+
if two asterisks '**' are appended, the unobserved allele will be removed all sites.
20182020
----
20192021
-m none .. no new multiallelics, output multiple records instead
20202022
-m snps .. allow multiallelic SNP records
20212023
-m indels .. allow multiallelic indel records
20222024
-m both .. both SNP and indel records can be multiallelic
2025+
-m both,* .. same as above but remove <*> (or <NON_REF>) from variant sites
2026+
-m both,** .. same as above but remove <*> (or <NON_REF>) at all sites
20232027
-m all .. SNP records can be merged with indel records
20242028
-m snp-ins-del .. allow multiallelic SNVs, insertions, deletions, but don't mix them
20252029
-m id .. merge by ID
@@ -3397,6 +3401,10 @@ Convert between VCF and BCF. Former *bcftools subset*.
33973401

33983402

33993403
==== Subset options:
3404+
*-A, --trim-unseen-alleles*::
3405+
remove the unseen allele '<*>' or '<NON_REF>' at variant sites when the option is given once (-A) or
3406+
at all sites when the options is given twice ('-AA').
3407+
34003408
*-a, --trim-alt-alleles*::
34013409
remove alleles not seen in the genotype fields from the ALT column. Note that if no alternate allele
34023410
remains after trimming, the record itself is not removed but ALT is set to ".".

0 commit comments

Comments
 (0)