Skip to content

Commit 6c1b01b

Browse files
authored
Merge pull request #1772 from jmarshall/index-stats-all
Add `bcftools index --stats --all` option to display all contigs
2 parents 0e2b777 + 487831e commit 6c1b01b

File tree

2 files changed

+17
-6
lines changed

2 files changed

+17
-6
lines changed

doc/bcftools.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1731,14 +1731,19 @@ the CSI first and then the TBI.
17311731
see *<<common_options,Common Options>>*
17321732

17331733
==== Stats options:
1734+
*-a, --all*::
1735+
Used in conjunction with *-s, --stats*, print per contig stats
1736+
for all contigs, even those with zero records and those for which
1737+
no stats are recorded in the index file (shown as '.').
1738+
17341739
*-n, --nrecords*::
17351740
print the number of records based on the CSI or TBI index files
17361741

17371742
*-s, --stats*::
17381743
Print per contig stats based on the CSI or TBI index files.
17391744
Output format is three tab-delimited columns listing the contig
17401745
name, contig length ('.' if unknown) and number of records for
1741-
the contig. Contigs with zero records are not printed.
1746+
the contig. Contigs with zero records are not printed by default.
17421747

17431748
[[isec]]
17441749
=== bcftools isec ['OPTIONS'] 'A.vcf.gz' 'B.vcf.gz' [...]

vcfindex.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ DEALINGS IN THE SOFTWARE. */
4040

4141
enum {
4242
per_contig = 1,
43-
total = 2
43+
all_contigs = 2,
44+
total = 4
4445
};
4546

4647
static void usage(void)
@@ -58,6 +59,7 @@ static void usage(void)
5859
fprintf(stderr, " --threads INT use multithreading with INT worker threads [0]\n");
5960
fprintf(stderr, "\n");
6061
fprintf(stderr, "Stats options:\n");
62+
fprintf(stderr, " -a, --all with --stats, print stats for all contigs even when zero\n");
6163
fprintf(stderr, " -n, --nrecords print number of records based on existing index file\n");
6264
fprintf(stderr, " -s, --stats print per contig stats based on existing index file\n");
6365
fprintf(stderr, "\n");
@@ -181,13 +183,15 @@ int vcf_index_stats(char *fname, int stats)
181183
for (tid=0; tid<nseq; tid++)
182184
{
183185
uint64_t records, v;
184-
hts_idx_get_stat(tbx ? tbx->idx : idx, tid, &records, &v);
186+
int ret = hts_idx_get_stat(tbx ? tbx->idx : idx, tid, &records, &v);
185187
sum += records;
186-
if ( (stats&total) || !records ) continue;
188+
if ( (stats&total) || (records == 0 && !(stats&all_contigs)) ) continue;
187189
const char *ctg_name = tbx ? seq[tid] : hdr ? bcf_hdr_id2name(hdr, tid) : "n/a";
188190
bcf_hrec_t *hrec = hdr ? bcf_hdr_get_hrec(hdr, BCF_HL_CTG, "ID", ctg_name, NULL) : NULL;
189191
int hkey = hrec ? bcf_hrec_find_key(hrec, "length") : -1;
190-
printf("%s\t%s\t%" PRIu64 "\n", ctg_name, hkey<0?".":hrec->vals[hkey], records);
192+
printf("%s\t%s\t", ctg_name, hkey<0?".":hrec->vals[hkey]);
193+
if (ret >= 0) printf("%" PRIu64 "\n", records);
194+
else printf(".\n");
191195
}
192196
if ( !sum )
193197
{
@@ -224,6 +228,7 @@ int main_vcfindex(int argc, char *argv[])
224228

225229
static struct option loptions[] =
226230
{
231+
{"all",no_argument,NULL,'a'},
227232
{"csi",no_argument,NULL,'c'},
228233
{"tbi",no_argument,NULL,'t'},
229234
{"force",no_argument,NULL,'f'},
@@ -237,7 +242,7 @@ int main_vcfindex(int argc, char *argv[])
237242
};
238243

239244
char *tmp;
240-
while ((c = getopt_long(argc, argv, "ctfm:sno:", loptions, NULL)) >= 0)
245+
while ((c = getopt_long(argc, argv, "ctfm:snao:", loptions, NULL)) >= 0)
241246
{
242247
switch (c)
243248
{
@@ -250,6 +255,7 @@ int main_vcfindex(int argc, char *argv[])
250255
break;
251256
case 's': stats |= per_contig; break;
252257
case 'n': stats |= total; break;
258+
case 'a': stats |= all_contigs; break;
253259
case 9:
254260
n_threads = strtol(optarg,&tmp,10);
255261
if ( *tmp ) error("Could not parse argument: --threads %s\n", optarg);

0 commit comments

Comments
 (0)