Skip to content

Commit 55a7623

Browse files
Rename IndexedVcf to VcfFile
1 parent 7796a04 commit 55a7623

File tree

4 files changed

+21
-21
lines changed

4 files changed

+21
-21
lines changed

bio2zarr/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -624,8 +624,8 @@ def vcfpartition(vcfs, verbose, num_partitions, partition_size):
624624
num_parts_per_path = max(1, num_partitions // len(vcfs))
625625

626626
for vcf_path in vcfs:
627-
indexed_vcf = vcf_utils.IndexedVcf(vcf_path)
628-
regions = indexed_vcf.partition_into_regions(
627+
vcf_file = vcf_utils.VcfFile(vcf_path)
628+
regions = vcf_file.partition_into_regions(
629629
num_parts=num_parts_per_path, target_part_size=partition_size
630630
)
631631
for region in regions:

bio2zarr/vcf2zarr/icf.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -228,8 +228,8 @@ def make_field_def(name, vcf_type, vcf_number):
228228

229229

230230
def scan_vcf(path, target_num_partitions):
231-
with vcf_utils.IndexedVcf(path) as indexed_vcf:
232-
vcf = indexed_vcf.vcf
231+
with vcf_utils.VcfFile(path) as vcf_file:
232+
vcf = vcf_file.vcf
233233
filters = []
234234
pass_index = -1
235235
for h in vcf.header_iter():
@@ -270,10 +270,10 @@ def scan_vcf(path, target_num_partitions):
270270
filters=filters,
271271
fields=fields,
272272
partitions=[],
273-
num_records=sum(indexed_vcf.contig_record_counts().values()),
273+
num_records=sum(vcf_file.contig_record_counts().values()),
274274
)
275275

276-
regions = indexed_vcf.partition_into_regions(num_parts=target_num_partitions)
276+
regions = vcf_file.partition_into_regions(num_parts=target_num_partitions)
277277
for region in regions:
278278
metadata.partitions.append(
279279
VcfPartition(
@@ -1093,9 +1093,9 @@ def process_partition(self, partition_index):
10931093
self.path,
10941094
partition_index,
10951095
) as tcw:
1096-
with vcf_utils.IndexedVcf(partition.vcf_path) as ivcf:
1096+
with vcf_utils.VcfFile(partition.vcf_path) as vcf:
10971097
num_records = 0
1098-
for variant in ivcf.variants(partition.region):
1098+
for variant in vcf.variants(partition.region):
10991099
num_records += 1
11001100
last_position = variant.POS
11011101
tcw.append("CHROM", variant.CHROM)

bio2zarr/vcf_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ class VcfIndexType(Enum):
396396
TABIX = ".tbi"
397397

398398

399-
class IndexedVcf(contextlib.AbstractContextManager):
399+
class VcfFile(contextlib.AbstractContextManager):
400400
def __init__(self, vcf_path, index_path=None):
401401
self.vcf = None
402402
self.file_type = None
@@ -471,7 +471,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
471471

472472
def contig_record_counts(self):
473473
if self.index is None:
474-
return {self.sequence_names[0]: np.inf}
474+
return {self.sequence_names[0]: RECORD_COUNT_UNKNOWN}
475475
d = dict(zip(self.sequence_names, self.index.record_counts))
476476
if self.file_type == VcfFileType.BCF:
477477
d = {k: v for k, v in d.items() if v > 0}

tests/test_vcf_utils.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,24 @@ def assert_part_counts_non_zero(part_counts, index_file):
1414
assert np.all(part_counts > 0)
1515

1616

17-
class TestIndexedVcf:
17+
class TestVcfFile:
1818
def get_instance(self, index_file):
1919
vcf_path = data_path / (".".join(list(index_file.split("."))[:-1]))
20-
return vcf_utils.IndexedVcf(vcf_path, data_path / index_file)
20+
return vcf_utils.VcfFile(vcf_path, data_path / index_file)
2121

2222
def test_context_manager_success(self):
2323
# Nominal case
24-
with vcf_utils.IndexedVcf(data_path / "sample.bcf") as iv:
24+
with vcf_utils.VcfFile(data_path / "sample.bcf") as iv:
2525
assert iv.vcf is not None
2626
assert iv.vcf is None
2727

2828
def test_context_manager_error(self):
2929
with pytest.raises(FileNotFoundError, match="no-such-file"):
30-
with vcf_utils.IndexedVcf(data_path / "no-such-file.bcf"):
30+
with vcf_utils.VcfFile(data_path / "no-such-file.bcf"):
3131
pass
3232

3333
def test_indels_filtered(self):
34-
with vcf_utils.IndexedVcf(data_path / "chr_m_indels.vcf.gz") as vfile:
34+
with vcf_utils.VcfFile(data_path / "chr_m_indels.vcf.gz") as vfile:
3535
# Hand-picked example that results in filtering
3636
region = vcf_utils.Region("chrM", 300, 314)
3737
pos = [var.POS for var in vfile.variants(region)]
@@ -173,7 +173,7 @@ def test_partition_into_n_parts_unindexed(
173173
):
174174
copy_path = tmp_path / vcf_file
175175
shutil.copyfile(data_path / vcf_file, copy_path)
176-
indexed_vcf = vcf_utils.IndexedVcf(copy_path)
176+
indexed_vcf = vcf_utils.VcfFile(copy_path)
177177
regions = list(indexed_vcf.partition_into_regions(num_parts=num_parts))
178178
assert len(regions) == 1
179179
part_variant_counts = np.array(
@@ -238,12 +238,12 @@ def test_partition_invalid_arguments(self):
238238
@pytest.mark.parametrize("path", ["y", data_path / "xxx", "/x/y.csi"])
239239
def test_missing_index_file(self, path):
240240
with pytest.raises(FileNotFoundError, match="Specified index path"):
241-
vcf_utils.IndexedVcf(data_path / "sample.vcf.gz", path)
241+
vcf_utils.VcfFile(data_path / "sample.vcf.gz", path)
242242

243243
def test_bad_index_format(self):
244244
vcf_file = data_path / "sample.vcf.gz"
245245
with pytest.raises(ValueError, match="Only .tbi or .csi indexes"):
246-
vcf_utils.IndexedVcf(vcf_file, vcf_file)
246+
vcf_utils.VcfFile(vcf_file, vcf_file)
247247

248248
@pytest.mark.parametrize(
249249
"filename",
@@ -256,11 +256,11 @@ def test_bad_index_format(self):
256256
],
257257
)
258258
def test_unindexed_single_contig(self, tmp_path, filename):
259-
f1 = vcf_utils.IndexedVcf(data_path / filename)
259+
f1 = vcf_utils.VcfFile(data_path / filename)
260260
assert f1.index is not None
261261
copy_path = tmp_path / filename
262262
shutil.copyfile(data_path / filename, copy_path)
263-
f2 = vcf_utils.IndexedVcf(copy_path)
263+
f2 = vcf_utils.VcfFile(copy_path)
264264
assert f2.index is None
265265
crc1 = f1.contig_record_counts()
266266
assert len(crc1) == 1
@@ -280,7 +280,7 @@ def test_unindexed_single_contig(self, tmp_path, filename):
280280
def test_unindexed_multi_contig(self, tmp_path, filename):
281281
copy_path = tmp_path / filename
282282
shutil.copyfile(data_path / filename, copy_path)
283-
f = vcf_utils.IndexedVcf(copy_path)
283+
f = vcf_utils.VcfFile(copy_path)
284284
with pytest.raises(ValueError, match="Multi-contig VCFs must be indexed"):
285285
list(f.variants())
286286

0 commit comments

Comments
 (0)