@@ -487,8 +487,17 @@ def test_unindexed_bcf(self, tmp_path):
487487 with pytest .raises (ValueError , match = "No index" ):
488488 dinf .BagOfVcf (tmp_path .glob ("*.bcf" ))
489489
490+ def test_missing_file (self ):
491+ missing_file = "nonexistent.vcf.gz"
492+ with pytest .raises (OSError , match = missing_file ):
493+ dinf .BagOfVcf ([missing_file ])
494+
495+ def test_no_files (self ):
496+ with pytest .raises (ValueError , match = "No usable vcf/bcf files" ):
497+ dinf .BagOfVcf ([])
498+
490499 @pytest .mark .usefixtures ("tmp_path" )
491- def test_file_list_duplicates (self , tmp_path ):
500+ def test_duplicate_files (self , tmp_path ):
492501 create_vcf_dataset (tmp_path , contig_lengths = [100_000 ])
493502 files = 2 * list (tmp_path .glob ("*.vcf.gz" ))
494503 with pytest .raises (ValueError , match = "File list contains duplicates" ):
@@ -504,10 +513,6 @@ def test_multiple_files_claim_a_contig(self, tmp_path):
504513 with pytest .raises (ValueError , match = "Both .* contain records for sequence" ):
505514 dinf .BagOfVcf (files )
506515
507- def test_no_files (self ):
508- with pytest .raises (ValueError , match = "No usable vcf/bcf files" ):
509- dinf .BagOfVcf ([])
510-
511516 @pytest .mark .usefixtures ("tmp_path" )
512517 def test_no_GT_field (self , tmp_path ):
513518 def remove_GT_header (filename ):
@@ -533,7 +538,7 @@ def test_individuals(self, tmp_path, num_individuals):
533538
534539 @pytest .mark .filterwarnings ("ignore:not all requested samples found:UserWarning" )
535540 @pytest .mark .usefixtures ("tmp_path" )
536- def test_bad_individuals (self , tmp_path ):
541+ def test_missing_individuals (self , tmp_path ):
537542 samples = create_vcf_dataset (tmp_path , contig_lengths = [100_000 ])
538543 individuals = ["nonexistent_1" ] + samples ["A" ] + ["nonexistent_2" ]
539544 with pytest .raises (ValueError , match = "individuals not found" ) as err :
@@ -543,6 +548,37 @@ def test_bad_individuals(self, tmp_path):
543548 for ind in samples ["A" ]:
544549 assert ind not in err .value .args [0 ]
545550
551+ @pytest .mark .usefixtures ("tmp_path" )
552+ def test_duplicate_individuals (self , tmp_path ):
553+ samples = create_vcf_dataset (tmp_path , contig_lengths = [100_000 ])
554+ individuals = samples ["A" ] + [samples ["A" ][0 ]]
555+ with pytest .raises (ValueError , match = "Individuals list contains duplicates" ):
556+ dinf .BagOfVcf (tmp_path .glob ("*.vcf.gz" ), individuals = individuals )
557+
558+ @pytest .mark .usefixtures ("tmp_path" )
559+ def test_contigs (self , tmp_path ):
560+ create_vcf_dataset (tmp_path , contig_lengths = [100_000 , 200_000 , 300_000 ])
561+ contigs = ["1" , "3" ]
562+ vb = dinf .BagOfVcf (tmp_path .glob ("*.vcf.gz" ), contigs = contigs )
563+ assert set (vb ) == set (contigs )
564+
565+ @pytest .mark .usefixtures ("tmp_path" )
566+ def test_missing_contigs (self , tmp_path ):
567+ create_vcf_dataset (tmp_path , contig_lengths = [100_000 , 200_000 ])
568+ contigs = ["nonexistent_a" , "1" , "2" , "nonexistent_b" ]
569+ with pytest .raises (ValueError , match = "contigs not found" ) as err :
570+ dinf .BagOfVcf (tmp_path .glob ("*.vcf.gz" ), contigs = contigs )
571+ assert "nonexistent_a" in err .value .args [0 ]
572+ assert "nonexistent_b" in err .value .args [0 ]
573+ assert "1" not in err .value .args [0 ]
574+ assert "2" not in err .value .args [0 ]
575+
576+ @pytest .mark .usefixtures ("tmp_path" )
577+ def test_duplicate_contigs (self , tmp_path ):
578+ create_vcf_dataset (tmp_path , contig_lengths = [100_000 ])
579+ with pytest .raises (ValueError , match = "Contigs list contains duplicates" ):
580+ dinf .BagOfVcf (tmp_path .glob ("*.vcf.gz" ), contigs = ["1" , "1" ])
581+
546582 @pytest .mark .usefixtures ("tmp_path" )
547583 def test_unused_contigs (self , tmp_path ):
548584 # Contigs in the header should be ignored if they have no variants.
0 commit comments