Skip to content

Commit 8cfe9ad

Browse files
Bump up test coverage
1 parent 4e91127 commit 8cfe9ad

File tree

3 files changed

+41
-31
lines changed

3 files changed

+41
-31
lines changed

bio2zarr/vcf.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,14 +1093,17 @@ def explode(self, *, worker_processes=1, show_progress=False):
10931093
show_progress=show_progress,
10941094
)
10951095

1096-
def explode_partition(self, partition, *, show_progress=False):
1096+
def explode_partition(self, partition, *, show_progress=False, worker_processes=1):
10971097
self.load_metadata()
10981098
if partition < 0 or partition >= self.num_partitions:
10991099
raise ValueError(
11001100
"Partition index must be in the range 0 <= index < num_partitions"
11011101
)
11021102
return self.process_partition_slice(
1103-
partition, partition + 1, worker_processes=1, show_progress=show_progress
1103+
partition,
1104+
partition + 1,
1105+
worker_processes=worker_processes,
1106+
show_progress=show_progress,
11041107
)
11051108

11061109
def finalise(self):
@@ -1127,13 +1130,13 @@ def finalise(self):
11271130

11281131
def explode(
11291132
vcfs,
1130-
cif_path,
1133+
icf_path,
11311134
*,
11321135
column_chunk_size=16,
11331136
worker_processes=1,
11341137
show_progress=False,
11351138
):
1136-
writer = IntermediateColumnarFormatWriter(cif_path)
1139+
writer = IntermediateColumnarFormatWriter(icf_path)
11371140
num_partitions = writer.init(
11381141
vcfs,
11391142
# Heuristic to get reasonable worker utilisation with lumpy partition sizing
@@ -1144,19 +1147,19 @@ def explode(
11441147
)
11451148
writer.explode(worker_processes=worker_processes, show_progress=show_progress)
11461149
writer.finalise()
1147-
return IntermediateColumnarFormat(cif_path)
1150+
return IntermediateColumnarFormat(icf_path)
11481151

11491152

11501153
def explode_init(
1151-
cif_path,
1154+
icf_path,
11521155
vcfs,
11531156
*,
11541157
column_chunk_size=16,
11551158
target_num_partitions=1,
11561159
worker_processes=1,
11571160
show_progress=False,
11581161
):
1159-
writer = IntermediateColumnarFormatWriter(cif_path)
1162+
writer = IntermediateColumnarFormatWriter(icf_path)
11601163
return writer.init(
11611164
vcfs,
11621165
target_num_partitions=target_num_partitions,
@@ -1166,13 +1169,18 @@ def explode_init(
11661169
)
11671170

11681171

1169-
def explode_partition(cif_path, partition, *, show_progress=False):
1170-
writer = IntermediateColumnarFormatWriter(cif_path)
1171-
writer.explode_partition(partition, show_progress=show_progress)
1172+
# NOTE only including worker_processes here so we can use the 0 option to get the
1173+
# work done syncronously and so we can get test coverage on it. Should find a
1174+
# better way to do this.
1175+
def explode_partition(icf_path, partition, *, show_progress=False, worker_processes=1):
1176+
writer = IntermediateColumnarFormatWriter(icf_path)
1177+
writer.explode_partition(
1178+
partition, show_progress=show_progress, worker_processes=worker_processes
1179+
)
11721180

11731181

1174-
def explode_finalise(cif_path):
1175-
writer = IntermediateColumnarFormatWriter(cif_path)
1182+
def explode_finalise(icf_path):
1183+
writer = IntermediateColumnarFormatWriter(icf_path)
11761184
writer.finalise()
11771185

11781186

tests/test_icf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,10 @@ def test_double_explode_partition(self, tmp_path):
150150
vcf.explode_init(icf_path, [self.data_path])
151151
summary_file = icf_path / "wip" / f"p{partition}_summary.json"
152152
assert not summary_file.exists()
153-
vcf.explode_partition(icf_path, partition)
153+
vcf.explode_partition(icf_path, partition, worker_processes=0)
154154
with open(summary_file) as f:
155155
s1 = f.read()
156-
vcf.explode_partition(icf_path, partition)
156+
vcf.explode_partition(icf_path, partition, worker_processes=0)
157157
with open(summary_file) as f:
158158
s2 = f.read()
159159
assert s1 == s2

tests/test_vcf_examples.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -360,12 +360,14 @@ def test_inspect(self, tmp_path):
360360
for row in data:
361361
assert "name" in row
362362

363-
@pytest.mark.parametrize("path", [
364-
"tests/data/vcf/sample_missing_contig.vcf.gz",
365-
"tests/data/vcf/sample_missing_contig.bcf",
366-
"tests/data/vcf/sample_missing_contig_csi.vcf.gz"]
367-
)
368-
363+
@pytest.mark.parametrize(
364+
"path",
365+
[
366+
"tests/data/vcf/sample_missing_contig.vcf.gz",
367+
"tests/data/vcf/sample_missing_contig.bcf",
368+
"tests/data/vcf/sample_missing_contig_csi.vcf.gz",
369+
],
370+
)
369371
def test_missing_contig_vcf(self, ds, tmp_path, path):
370372
# 20 has been removed from the header. The datasets is the same,
371373
# but the ordering of contigs has been permuted. This seems to be the
@@ -772,17 +774,19 @@ def test_info_string2(self, ds):
772774

773775

774776
class TestSplitFileErrors:
775-
776777
def test_entirely_incompatible(self, tmp_path):
777778
path = "tests/data/vcf/"
778779
with pytest.raises(ValueError, match="Incompatible"):
779-
vcf.explode_init(tmp_path, [path + "sample.vcf.gz", path + "1kg_2020_chrM.bcf"])
780+
vcf.explode_init(
781+
tmp_path, [path + "sample.vcf.gz", path + "1kg_2020_chrM.bcf"]
782+
)
780783

781784
def test_duplicate_paths(self, tmp_path):
782785
path = "tests/data/vcf/"
783786
with pytest.raises(ValueError, match="Duplicate"):
784787
vcf.explode_init(tmp_path, [path + "sample.vcf.gz"] * 2)
785788

789+
786790
@pytest.mark.parametrize(
787791
"name",
788792
[
@@ -835,13 +839,13 @@ def test_split_explode(tmp_path):
835839
vcf.explode_partition(out, j)
836840
vcf.explode_finalise(out)
837841
pcvcf = vcf.IntermediateColumnarFormat(out)
838-
assert pcvcf.columns['POS'].vcf_field.summary.asdict() == {
839-
'num_chunks': 3,
840-
'compressed_size': 630,
841-
'uncompressed_size': 1008,
842-
'max_number': 1,
843-
'max_value': 1235237,
844-
'min_value': 10
842+
assert pcvcf.columns["POS"].vcf_field.summary.asdict() == {
843+
"num_chunks": 3,
844+
"compressed_size": 630,
845+
"uncompressed_size": 1008,
846+
"max_number": 1,
847+
"max_value": 1235237,
848+
"min_value": 10,
845849
}
846850
vcf.encode(out, tmp_path / "test.zarr")
847851
vcf.validate("tests/data/vcf/sample.vcf.gz", tmp_path / "test.zarr")
@@ -851,5 +855,3 @@ def test_missing_filter(tmp_path):
851855
path = "tests/data/vcf/sample_missing_filter.vcf.gz"
852856
with pytest.raises(ValueError, match="Filter 'q10' was not defined in the header"):
853857
vcf.convert([path], tmp_path)
854-
855-

0 commit comments

Comments
 (0)