@@ -74,7 +74,12 @@ def test_not_enough_memory_for_two(
74
74
other_zarr_path = tmp_path / "zarr"
75
75
with caplog .at_level ("WARNING" ):
76
76
vcf2zarr .encode (
77
- icf_path , other_zarr_path , max_memory = max_memory , worker_processes = 2
77
+ icf_path ,
78
+ other_zarr_path ,
79
+ max_memory = max_memory ,
80
+ worker_processes = 2 ,
81
+ samples_chunk_size = 1000 ,
82
+ variants_chunk_size = 10_000 ,
78
83
)
79
84
assert "Limiting number of workers to 1 to keep within" in caplog .text
80
85
ds1 = sg .load_dataset (zarr_path )
@@ -164,6 +169,12 @@ def test_chunk_sizes(self, icf_path, samples_chunk_size, variants_chunk_size):
164
169
found += 1
165
170
assert found > 0
166
171
172
+ def test_default_chunk_size (self , icf_path ):
173
+ icf = vcf2zarr .IntermediateColumnarFormat (icf_path )
174
+ schema = vcf2zarr .VcfZarrSchema .generate (icf )
175
+ assert schema .samples_chunk_size == 10_000
176
+ assert schema .variants_chunk_size == 1000
177
+
167
178
168
179
class TestSchemaJsonRoundTrip :
169
180
def assert_json_round_trip (self , schema ):
@@ -297,8 +308,8 @@ def test_format_version(self, schema):
297
308
assert schema .format_version == vcz_mod .ZARR_SCHEMA_FORMAT_VERSION
298
309
299
310
def test_chunk_size (self , schema ):
300
- assert schema .samples_chunk_size == 1000
301
- assert schema .variants_chunk_size == 10000
311
+ assert schema .samples_chunk_size == 10000
312
+ assert schema .variants_chunk_size == 1000
302
313
303
314
def test_samples (self , schema ):
304
315
assert schema .asdict ()["samples" ] == [
@@ -322,7 +333,7 @@ def test_variant_contig(self, schema):
322
333
"name" : "variant_contig" ,
323
334
"dtype" : "i1" ,
324
335
"shape" : (9 ,),
325
- "chunks" : (10000 ,),
336
+ "chunks" : (1000 ,),
326
337
"dimensions" : ("variants" ,),
327
338
"description" : "An identifier from the reference genome or an "
328
339
"angle-bracketed ID string pointing to a contig in the assembly file" ,
@@ -342,7 +353,7 @@ def test_call_genotype(self, schema):
342
353
"name" : "call_genotype" ,
343
354
"dtype" : "i1" ,
344
355
"shape" : (9 , 3 , 2 ),
345
- "chunks" : (10000 , 1000 , 2 ),
356
+ "chunks" : (1000 , 10000 , 2 ),
346
357
"dimensions" : ("variants" , "samples" , "ploidy" ),
347
358
"description" : "" ,
348
359
"vcf_field" : None ,
@@ -361,7 +372,7 @@ def test_call_genotype_mask(self, schema):
361
372
"name" : "call_genotype_mask" ,
362
373
"dtype" : "bool" ,
363
374
"shape" : (9 , 3 , 2 ),
364
- "chunks" : (10000 , 1000 , 2 ),
375
+ "chunks" : (1000 , 10000 , 2 ),
365
376
"dimensions" : ("variants" , "samples" , "ploidy" ),
366
377
"description" : "" ,
367
378
"vcf_field" : None ,
@@ -380,7 +391,7 @@ def test_call_genotype_phased(self, schema):
380
391
"name" : "call_genotype_mask" ,
381
392
"dtype" : "bool" ,
382
393
"shape" : (9 , 3 , 2 ),
383
- "chunks" : (10000 , 1000 , 2 ),
394
+ "chunks" : (1000 , 10000 , 2 ),
384
395
"dimensions" : ("variants" , "samples" , "ploidy" ),
385
396
"description" : "" ,
386
397
"vcf_field" : None ,
@@ -399,7 +410,7 @@ def test_call_GQ(self, schema):
399
410
"name" : "call_GQ" ,
400
411
"dtype" : "i1" ,
401
412
"shape" : (9 , 3 ),
402
- "chunks" : (10000 , 1000 ),
413
+ "chunks" : (1000 , 10000 ),
403
414
"dimensions" : ("variants" , "samples" ),
404
415
"description" : "Genotype Quality" ,
405
416
"vcf_field" : "FORMAT/GQ" ,
0 commit comments