1
+ import pathlib
2
+ import collections
3
+
1
4
import numpy as np
2
5
import numpy .testing as nt
3
6
import xarray .testing as xt
@@ -284,11 +287,15 @@ def test_chunk_size(
284
287
assert ds2 .sample_id .chunks == (x_chunks ,)
285
288
286
289
@pytest .mark .parametrize ("worker_processes" , [0 , 1 , 2 ])
287
- def test_worker_processes (self , ds , tmp_path , worker_processes ):
290
+ @pytest .mark .parametrize ("rotate" , [0 , 1 , 2 ])
291
+ def test_split (self , ds , tmp_path , worker_processes , rotate ):
288
292
out = tmp_path / "example.vcf.zarr"
289
- vcf .convert (
290
- [self .data_path ], out , chunk_length = 3 , worker_processes = worker_processes
291
- )
293
+ split_path = pathlib .Path (self .data_path + ".3.split" )
294
+ files = collections .deque (sorted (list (split_path .glob ("*.vcf.gz" ))))
295
+ # Rotate the list to check we are OK with different orderings
296
+ files .rotate (rotate )
297
+ assert len (files ) == 3
298
+ vcf .convert (files , out , worker_processes = worker_processes )
292
299
ds2 = sg .load_dataset (out )
293
300
xt .assert_equal (ds , ds2 )
294
301
@@ -308,6 +315,15 @@ def test_full_pipeline(self, ds, tmp_path, worker_processes):
308
315
ds2 = sg .load_dataset (out )
309
316
xt .assert_equal (ds , ds2 )
310
317
318
+ @pytest .mark .parametrize ("worker_processes" , [0 , 1 , 2 ])
319
+ def test_worker_processes (self , ds , tmp_path , worker_processes ):
320
+ out = tmp_path / "example.vcf.zarr"
321
+ vcf .convert (
322
+ [self .data_path ], out , chunk_length = 3 , worker_processes = worker_processes
323
+ )
324
+ ds2 = sg .load_dataset (out )
325
+ xt .assert_equal (ds , ds2 )
326
+
311
327
312
328
class Test1000G2020Example :
313
329
data_path = "tests/data/vcf/1kg_2020_chrM.vcf.gz"
0 commit comments