Bump up test coverage

jeromekelleher · jeromekelleher · commit 8cfe9ad8f65e · 2024-03-27T15:42:57.000Z
diff --git a/bio2zarr/vcf.py b/bio2zarr/vcf.py
@@ -1093,14 +1093,17 @@ def explode(self, *, worker_processes=1, show_progress=False):
             show_progress=show_progress,
         )
 
-    def explode_partition(self, partition, *, show_progress=False):
+    def explode_partition(self, partition, *, show_progress=False, worker_processes=1):
         self.load_metadata()
         if partition < 0 or partition >= self.num_partitions:
             raise ValueError(
                 "Partition index must be in the range 0 <= index < num_partitions"
             )
         return self.process_partition_slice(
-            partition, partition + 1, worker_processes=1, show_progress=show_progress
+            partition,
+            partition + 1,
+            worker_processes=worker_processes,
+            show_progress=show_progress,
         )
 
     def finalise(self):
@@ -1127,13 +1130,13 @@ def finalise(self):
 
 def explode(
     vcfs,
-    cif_path,
+    icf_path,
     *,
     column_chunk_size=16,
     worker_processes=1,
     show_progress=False,
 ):
-    writer = IntermediateColumnarFormatWriter(cif_path)
+    writer = IntermediateColumnarFormatWriter(icf_path)
     num_partitions = writer.init(
         vcfs,
         # Heuristic to get reasonable worker utilisation with lumpy partition sizing
@@ -1144,19 +1147,19 @@ def explode(
     )
     writer.explode(worker_processes=worker_processes, show_progress=show_progress)
     writer.finalise()
-    return IntermediateColumnarFormat(cif_path)
+    return IntermediateColumnarFormat(icf_path)
 
 
 def explode_init(
-    cif_path,
+    icf_path,
     vcfs,
     *,
     column_chunk_size=16,
     target_num_partitions=1,
     worker_processes=1,
     show_progress=False,
 ):
-    writer = IntermediateColumnarFormatWriter(cif_path)
+    writer = IntermediateColumnarFormatWriter(icf_path)
     return writer.init(
         vcfs,
         target_num_partitions=target_num_partitions,
@@ -1166,13 +1169,18 @@ def explode_init(
     )
 
 
-def explode_partition(cif_path, partition, *, show_progress=False):
-    writer = IntermediateColumnarFormatWriter(cif_path)
-    writer.explode_partition(partition, show_progress=show_progress)
+# NOTE only including worker_processes here so we can use the 0 option to get the
+# work done syncronously and so we can get test coverage on it. Should find a
+# better way to do this.
+def explode_partition(icf_path, partition, *, show_progress=False, worker_processes=1):
+    writer = IntermediateColumnarFormatWriter(icf_path)
+    writer.explode_partition(
+        partition, show_progress=show_progress, worker_processes=worker_processes
+    )
 
 
-def explode_finalise(cif_path):
-    writer = IntermediateColumnarFormatWriter(cif_path)
+def explode_finalise(icf_path):
+    writer = IntermediateColumnarFormatWriter(icf_path)
     writer.finalise()
 
 
diff --git a/tests/test_icf.py b/tests/test_icf.py
@@ -150,10 +150,10 @@ def test_double_explode_partition(self, tmp_path):
         vcf.explode_init(icf_path, [self.data_path])
         summary_file = icf_path / "wip" / f"p{partition}_summary.json"
         assert not summary_file.exists()
-        vcf.explode_partition(icf_path, partition)
+        vcf.explode_partition(icf_path, partition, worker_processes=0)
         with open(summary_file) as f:
             s1 = f.read()
-        vcf.explode_partition(icf_path, partition)
+        vcf.explode_partition(icf_path, partition, worker_processes=0)
         with open(summary_file) as f:
             s2 = f.read()
         assert s1 == s2
diff --git a/tests/test_vcf_examples.py b/tests/test_vcf_examples.py
@@ -360,12 +360,14 @@ def test_inspect(self, tmp_path):
         for row in data:
             assert "name" in row
 
-    @pytest.mark.parametrize("path", [
-        "tests/data/vcf/sample_missing_contig.vcf.gz",
-        "tests/data/vcf/sample_missing_contig.bcf",
-        "tests/data/vcf/sample_missing_contig_csi.vcf.gz"]
-        )
-
+    @pytest.mark.parametrize(
+        "path",
+        [
+            "tests/data/vcf/sample_missing_contig.vcf.gz",
+            "tests/data/vcf/sample_missing_contig.bcf",
+            "tests/data/vcf/sample_missing_contig_csi.vcf.gz",
+        ],
+    )
     def test_missing_contig_vcf(self, ds, tmp_path, path):
         # 20 has been removed from the header. The datasets is the same,
         # but the ordering of contigs has been permuted. This seems to be the
@@ -772,17 +774,19 @@ def test_info_string2(self, ds):
 
 
 class TestSplitFileErrors:
-
     def test_entirely_incompatible(self, tmp_path):
         path = "tests/data/vcf/"
         with pytest.raises(ValueError, match="Incompatible"):
-            vcf.explode_init(tmp_path, [path + "sample.vcf.gz", path + "1kg_2020_chrM.bcf"])
+            vcf.explode_init(
+                tmp_path, [path + "sample.vcf.gz", path + "1kg_2020_chrM.bcf"]
+            )
 
     def test_duplicate_paths(self, tmp_path):
         path = "tests/data/vcf/"
         with pytest.raises(ValueError, match="Duplicate"):
             vcf.explode_init(tmp_path, [path + "sample.vcf.gz"] * 2)
 
+
 @pytest.mark.parametrize(
     "name",
     [
@@ -835,13 +839,13 @@ def test_split_explode(tmp_path):
         vcf.explode_partition(out, j)
     vcf.explode_finalise(out)
     pcvcf = vcf.IntermediateColumnarFormat(out)
-    assert pcvcf.columns['POS'].vcf_field.summary.asdict() == {
-        'num_chunks': 3,
-        'compressed_size': 630,
-        'uncompressed_size': 1008,
-        'max_number': 1,
-        'max_value': 1235237,
-        'min_value': 10
+    assert pcvcf.columns["POS"].vcf_field.summary.asdict() == {
+        "num_chunks": 3,
+        "compressed_size": 630,
+        "uncompressed_size": 1008,
+        "max_number": 1,
+        "max_value": 1235237,
+        "min_value": 10,
     }
     vcf.encode(out, tmp_path / "test.zarr")
     vcf.validate("tests/data/vcf/sample.vcf.gz", tmp_path / "test.zarr")
@@ -851,5 +855,3 @@ def test_missing_filter(tmp_path):
     path = "tests/data/vcf/sample_missing_filter.vcf.gz"
     with pytest.raises(ValueError, match="Filter 'q10' was not defined in the header"):
         vcf.convert([path], tmp_path)
-
-