sgkit-dev
diff --git a/‎tests/data/vcf/sample.vcf.gz.3.split/19:1-.vcf.gz‎
767 Bytes b/‎tests/data/vcf/sample.vcf.gz.3.split/19:1-.vcf.gz‎
767 Bytes
diff --git a/‎tests/data/vcf/sample.vcf.gz.3.split/19:1-.vcf.gz.csi‎
113 Bytes b/‎tests/data/vcf/sample.vcf.gz.3.split/19:1-.vcf.gz.csi‎
113 Bytes
diff --git a/‎tests/data/vcf/sample.vcf.gz.3.split/20.vcf.gz‎
1013 Bytes b/‎tests/data/vcf/sample.vcf.gz.3.split/20.vcf.gz‎
1013 Bytes
diff --git a/‎tests/data/vcf/sample.vcf.gz.3.split/20.vcf.gz.csi‎
149 Bytes b/‎tests/data/vcf/sample.vcf.gz.3.split/20.vcf.gz.csi‎
149 Bytes
diff --git a/‎tests/data/vcf/sample.vcf.gz.3.split/X.vcf.gz‎
753 Bytes b/‎tests/data/vcf/sample.vcf.gz.3.split/X.vcf.gz‎
753 Bytes
diff --git a/‎tests/data/vcf/sample.vcf.gz.3.split/X.vcf.gz.csi‎
112 Bytes b/‎tests/data/vcf/sample.vcf.gz.3.split/X.vcf.gz.csi‎
112 Bytes
diff --git a/‎tests/test_vcf_examples.py‎
Lines changed: 20 additions & 4 deletions b/‎tests/test_vcf_examples.py‎
Lines changed: 20 additions & 4 deletions
@@ -1,3 +1,6 @@
+import pathlib
+import collections
+
 import numpy as np
 import numpy.testing as nt
 import xarray.testing as xt
@@ -284,11 +287,15 @@ def test_chunk_size(
         assert ds2.sample_id.chunks == (x_chunks,)
 
     @pytest.mark.parametrize("worker_processes", [0, 1, 2])
-    def test_worker_processes(self, ds, tmp_path, worker_processes):
+    @pytest.mark.parametrize("rotate", [0, 1, 2])
+    def test_split(self, ds, tmp_path, worker_processes, rotate):
         out = tmp_path / "example.vcf.zarr"
-        vcf.convert(
-            [self.data_path], out, chunk_length=3, worker_processes=worker_processes
-        )
+        split_path = pathlib.Path(self.data_path + ".3.split")
+        files = collections.deque(sorted(list(split_path.glob("*.vcf.gz"))))
+        # Rotate the list to check we are OK with different orderings
+        files.rotate(rotate)
+        assert len(files) == 3
+        vcf.convert(files, out, worker_processes=worker_processes)
         ds2 = sg.load_dataset(out)
         xt.assert_equal(ds, ds2)
 
@@ -308,6 +315,15 @@ def test_full_pipeline(self, ds, tmp_path, worker_processes):
         ds2 = sg.load_dataset(out)
         xt.assert_equal(ds, ds2)
 
+    @pytest.mark.parametrize("worker_processes", [0, 1, 2])
+    def test_worker_processes(self, ds, tmp_path, worker_processes):
+        out = tmp_path / "example.vcf.zarr"
+        vcf.convert(
+            [self.data_path], out, chunk_length=3, worker_processes=worker_processes
+        )
+        ds2 = sg.load_dataset(out)
+        xt.assert_equal(ds, ds2)
+
 
 class Test1000G2020Example:
     data_path = "tests/data/vcf/1kg_2020_chrM.vcf.gz"