@@ -84,11 +84,14 @@ class Region:
84
84
A htslib style region, where coordinates are 1-based and inclusive.
85
85
"""
86
86
87
- contig : str
87
+ contig : Optional [ str ] = None
88
88
start : Optional [int ] = None
89
89
end : Optional [int ] = None
90
90
91
91
def __post_init__ (self ):
92
+ if self .contig is None :
93
+ return
94
+
92
95
if self .start is not None :
93
96
self .start = int (self .start )
94
97
assert self .start > 0
@@ -408,9 +411,8 @@ def __init__(self, vcf_path, index_path=None):
408
411
vcf_path .suffix + VcfIndexType .CSI .value
409
412
)
410
413
if not index_path .exists ():
411
- raise FileNotFoundError (
412
- f"Cannot find .tbi or .csi file for { vcf_path } "
413
- )
414
+ # Use this as a proxy for "no index"
415
+ index_path = vcf_path
414
416
else :
415
417
index_path = pathlib .Path (index_path )
416
418
@@ -424,14 +426,18 @@ def __init__(self, vcf_path, index_path=None):
424
426
elif index_path .suffix == VcfIndexType .TABIX .value :
425
427
self .index_type = VcfIndexType .TABIX
426
428
self .file_type = VcfFileType .VCF
427
- else :
428
- raise ValueError ("Only .tbi or .csi indexes are supported." )
429
+ # else:
430
+
431
+ # raise ValueError("Only .tbi or .csi indexes are supported.")
429
432
430
433
self .vcf = cyvcf2 .VCF (vcf_path )
431
- self .vcf .set_index (str (self .index_path ))
434
+ if self .index_type is not None :
435
+ self .vcf .set_index (str (self .index_path ))
436
+
432
437
logger .debug (f"Loaded { vcf_path } with index { self .index_path } " )
433
438
self .sequence_names = None
434
439
440
+ self .index = None
435
441
if self .index_type == VcfIndexType .CSI :
436
442
# Determine the file-type based on the "aux" field.
437
443
self .index = read_csi (self .index_path )
@@ -441,7 +447,7 @@ def __init__(self, vcf_path, index_path=None):
441
447
self .sequence_names = self .index .parse_vcf_aux ()
442
448
else :
443
449
self .sequence_names = self .vcf .seqnames
444
- else :
450
+ elif self . index_type == VcfIndexType . TABIX :
445
451
self .index = read_tabix (self .index_path )
446
452
self .sequence_names = self .index .sequence_names
447
453
@@ -452,6 +458,8 @@ def __exit__(self, exc_type, exc_val, exc_tb):
452
458
return False
453
459
454
460
def contig_record_counts (self ):
461
+ if self .index is None :
462
+ return {None : np .inf }
455
463
d = dict (zip (self .sequence_names , self .index .record_counts ))
456
464
if self .file_type == VcfFileType .BCF :
457
465
d = {k : v for k , v in d .items () if v > 0 }
@@ -461,11 +469,15 @@ def count_variants(self, region):
461
469
return sum (1 for _ in self .variants (region ))
462
470
463
471
def variants (self , region ):
464
- start = 1 if region .start is None else region .start
465
- for var in self .vcf (str (region )):
466
- # Need to filter because of indels overlapping the region
467
- if var .POS >= start :
468
- yield var
472
+ if self .index is None :
473
+ assert region .contig is None
474
+ yield from self .vcf
475
+ else :
476
+ start = 1 if region .start is None else region .start
477
+ for var in self .vcf (str (region )):
478
+ # Need to filter because of indels overlapping the region
479
+ if var .POS >= start :
480
+ yield var
469
481
470
482
def _filter_empty_and_refine (self , regions ):
471
483
"""
@@ -486,6 +498,9 @@ def partition_into_regions(
486
498
num_parts : Optional [int ] = None ,
487
499
target_part_size : Union [None , int , str ] = None ,
488
500
):
501
+ if self .index is None :
502
+ return [Region ()]
503
+
489
504
if num_parts is None and target_part_size is None :
490
505
raise ValueError ("One of num_parts or target_part_size must be specified" )
491
506
0 commit comments