Skip to content

Commit d092f00

Browse files
committed
Add new option --force-single to support single-file edge case
This also - adds a check for cases where the --file-list is empty - adds a synonynomous option --force-no-index to --no-index Resolves #2100
1 parent 1a975b3 commit d092f00

File tree

5 files changed

+35
-4
lines changed

5 files changed

+35
-4
lines changed

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ Changes affecting specific commands:
1212

1313
- Add new option `-l, --file-list` to read the list of file names from a file
1414

15+
* bcftools merge
16+
17+
- Add new option `--force-single` to support single-file edge case (#2100)
18+
1519
* bcftools norm
1620

1721
- Change the order of atomization and multiallelic splitting (when both -a,-m are given)

doc/bcftools.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1970,13 +1970,18 @@ also given explicitly using the *--print-header* and *--use-header* options.
19701970
Note that only records from different files can be merged, never from the same file.
19711971
For "vertical" merge take a look at *<<concat,bcftools concat>>* or *<<norm,bcftools norm>> -m* instead.
19721972

1973+
*--force-no-index*::
1974+
synonymous to *--no-index*
19731975

19741976
*--force-samples*::
19751977
if the merged files contain duplicate samples names, proceed anyway.
19761978
Duplicate sample names will be resolved by prepending the index of the file
19771979
as it appeared on the command line to the conflicting sample name (see
19781980
'2:S3' in the above example).
19791981

1982+
*--force-single*::
1983+
run even if only one file is given on input
1984+
19801985
*--print-header*::
19811986
print only merged header and exit
19821987

test/merge.LPL.0.out

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
##fileformat=VCFv4.3
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
4+
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Genotype Likelihoods">
5+
##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihoods">
6+
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic Depths">
7+
##FORMAT=<ID=DF,Number=R,Type=Float,Description="Dummy">
8+
##FORMAT=<ID=DD,Number=A,Type=Integer,Description="Dummy">
9+
##contig=<ID=1,assembly=b37,length=249250621>
10+
##reference=ref.fa
11+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
12+
1 30000 . C T . . . GT:PL:GL:AD:DF:DD 0/1:20,0,20:20,0,20:1,2:0.1,0.2:1
13+
1 30001 . C CA . . . GT:PL:GL:AD:DF:DD 0/1:10,0,10:10,0,10:1,2:0.1,0.2:1
14+
1 30002 . C CA,CAA . . . GT:PL:GL:AD:DF:DD 1/2:20,20,20,10,0,10:20,20,20,10,0,10:0,1,2:0,0.1,0.2:1,2
15+
1 30003 . C CA,CAA,CAAA . . . GT:PL:GL:AD:DF:DD 1/2:20,20,20,10,0,10,20,20,10,20:20,20,20,10,0,10,20,20,10,20:0,1,2,1:0,0.1,0.2,0.1:1,2,3

test/test.pl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
run_test(\&test_vcf_isec,$opts,in=>['isec-miss.2.1','isec-miss.2.2','isec-miss.2.3'],out=>'isec-miss.2.1.out',args=>'-n +1 -r 20:100,20:140,12:55,20:140,20:100');
6161
run_test(\&test_vcf_isec,$opts,in=>['isec-miss.2.1','isec-miss.2.2','isec-miss.2.3'],out=>'isec-miss.2.1.out',args=>'-R {PATH}/isec-miss.1.regs.txt -n +1');
6262
run_test(\&test_vcf_merge,$opts,in=>['merge.join.a','merge.join.b'],out=>'merge.join.1.out',args=>'-i AF:join');
63+
run_test(\&test_vcf_merge,$opts,in=>['merge.LPL.a'],out=>'merge.LPL.0.out',args=>'--force-single');
6364
run_test(\&test_vcf_merge,$opts,in=>['merge.LPL.a','merge.LPL.b','merge.LPL.c'],out=>'merge.LPL.1.out',args=>'--force-samples');
6465
run_test(\&test_vcf_merge,$opts,in=>['merge.LPL.a','merge.LPL.b','merge.LPL.c'],out=>'merge.LPL.2.out',args=>'--force-samples -L 1');
6566
run_test(\&test_vcf_merge,$opts,in=>['merge.LPL.a','merge.LPL.b','merge.LPL.c'],out=>'merge.LPL.3.out',args=>'--force-samples -L 2');

vcfmerge.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* vcfmerge.c -- Merge multiple VCF/BCF files to create one multi-sample file.
22
3-
Copyright (C) 2012-2023 Genome Research Ltd.
3+
Copyright (C) 2012-2024 Genome Research Ltd.
44
55
Author: Petr Danecek <[email protected]>
66
@@ -174,7 +174,7 @@ typedef struct
174174
maux_t *maux;
175175
regidx_t *regs; // apply regions only after the blocks are expanded
176176
regitr_t *regs_itr;
177-
int header_only, collapse, output_type, force_samples, merge_by_id, do_gvcf, filter_logic, missing_to_ref, no_index;
177+
int header_only, collapse, output_type, force_samples, force_single, merge_by_id, do_gvcf, filter_logic, missing_to_ref, no_index;
178178
char *header_fname, *output_fname, *regions_list, *info_rules, *file_list;
179179
faidx_t *gvcf_fai;
180180
info_rule_t *rules;
@@ -3465,7 +3465,9 @@ static void usage(void)
34653465
fprintf(stderr, "Usage: bcftools merge [options] <A.vcf.gz> <B.vcf.gz> [...]\n");
34663466
fprintf(stderr, "\n");
34673467
fprintf(stderr, "Options:\n");
3468+
fprintf(stderr, " --force-no-index Merge unindexed files, synonymous to --no-index\n");
34683469
fprintf(stderr, " --force-samples Resolve duplicate sample names\n");
3470+
fprintf(stderr, " --force-single Run even if there is only one file on input\n");
34693471
fprintf(stderr, " --print-header Print only the merged header and exit\n");
34703472
fprintf(stderr, " --use-header FILE Use the provided header\n");
34713473
fprintf(stderr, " -0 --missing-to-ref Assume genotypes at missing sites are 0/0\n");
@@ -3527,6 +3529,8 @@ int main_vcfmerge(int argc, char *argv[])
35273529
{"missing-rules",required_argument,NULL,'M'},
35283530
{"no-version",no_argument,NULL,8},
35293531
{"no-index",no_argument,NULL,10},
3532+
{"force-no-index",no_argument,NULL,10},
3533+
{"force-single",no_argument,NULL,12},
35303534
{"filter-logic",required_argument,NULL,'F'},
35313535
{"write-index",no_argument,NULL,11},
35323536
{NULL,0,NULL,0}
@@ -3608,14 +3612,13 @@ int main_vcfmerge(int argc, char *argv[])
36083612
case 8 : args->record_cmd_line = 0; break;
36093613
case 10 : args->no_index = 1; break;
36103614
case 11 : args->write_index = 1; break;
3615+
case 12 : args->force_single = 1; break;
36113616
case 'h':
36123617
case '?': usage(); break;
36133618
default: error("Unknown argument: %s\n", optarg);
36143619
}
36153620
}
36163621
if ( argc==optind && !args->file_list ) usage();
3617-
if ( argc-optind<2 && !args->file_list ) usage();
3618-
36193622
if ( args->no_index )
36203623
{
36213624
if ( args->regions_list ) error("Error: cannot combine --no-index with -r/-R\n");
@@ -3656,6 +3659,9 @@ int main_vcfmerge(int argc, char *argv[])
36563659
for (i=0; i<nfiles; i++) free(files[i]);
36573660
free(files);
36583661
}
3662+
if ( !args->files->nreaders ) usage();
3663+
if ( args->files->nreaders==1 && !args->force_single ) error("Expected two or more files to merge, got only one. Use --force-single to proceed anyway\n");
3664+
36593665
merge_vcf(args);
36603666
bcf_sr_destroy(args->files);
36613667
if ( args->regs ) regidx_destroy(args->regs);

0 commit comments

Comments
 (0)