@@ -32,7 +32,7 @@ def schema_path(icf_path, tmp_path_factory):
32
32
@pytest .fixture (scope = "module" )
33
33
def schema (schema_path ):
34
34
with open (schema_path ) as f :
35
- return json . load ( f )
35
+ return vcf . VcfZarrSchema . fromjson ( f . read () )
36
36
37
37
38
38
@pytest .fixture (scope = "module" )
@@ -83,7 +83,7 @@ def test_not_enough_memory_for_two(
83
83
class TestJsonVersions :
84
84
@pytest .mark .parametrize ("version" , ["0.1" , "1.0" , "xxxxx" , 0.2 ])
85
85
def test_zarr_schema_mismatch (self , schema , version ):
86
- d = dict ( schema )
86
+ d = schema . asdict ( )
87
87
d ["format_version" ] = version
88
88
with pytest .raises (ValueError , match = "Zarr schema format version mismatch" ):
89
89
vcf .VcfZarrSchema .fromdict (d )
@@ -156,13 +156,13 @@ def test_generated_no_samples(self, icf_path):
156
156
def test_generated_change_dtype (self , icf_path ):
157
157
icf = vcf .IntermediateColumnarFormat (icf_path )
158
158
schema = vcf .VcfZarrSchema .generate (icf )
159
- schema .fields ["variant_position" ].dtype = "i8"
159
+ schema .field_map () ["variant_position" ].dtype = "i8"
160
160
self .assert_json_round_trip (schema )
161
161
162
162
def test_generated_change_compressor (self , icf_path ):
163
163
icf = vcf .IntermediateColumnarFormat (icf_path )
164
164
schema = vcf .VcfZarrSchema .generate (icf )
165
- schema .fields ["variant_position" ].compressor = {"cname" : "FAKE" }
165
+ schema .field_map () ["variant_position" ].compressor = {"cname" : "FAKE" }
166
166
self .assert_json_round_trip (schema )
167
167
168
168
@@ -174,7 +174,7 @@ def test_codec(self, tmp_path, icf_path, cname, clevel, shuffle):
174
174
zarr_path = tmp_path / "zarr"
175
175
icf = vcf .IntermediateColumnarFormat (icf_path )
176
176
schema = vcf .VcfZarrSchema .generate (icf )
177
- for var in schema .fields . values () :
177
+ for var in schema .fields :
178
178
var .compressor ["cname" ] = cname
179
179
var .compressor ["clevel" ] = clevel
180
180
var .compressor ["shuffle" ] = shuffle
@@ -183,7 +183,7 @@ def test_codec(self, tmp_path, icf_path, cname, clevel, shuffle):
183
183
f .write (schema .asjson ())
184
184
vcf .encode (icf_path , zarr_path , schema_path = schema_path )
185
185
root = zarr .open (zarr_path )
186
- for var in schema .fields . values () :
186
+ for var in schema .fields :
187
187
a = root [var .name ]
188
188
assert a .compressor .cname == cname
189
189
assert a .compressor .clevel == clevel
@@ -194,7 +194,7 @@ def test_genotype_dtype(self, tmp_path, icf_path, dtype):
194
194
zarr_path = tmp_path / "zarr"
195
195
icf = vcf .IntermediateColumnarFormat (icf_path )
196
196
schema = vcf .VcfZarrSchema .generate (icf )
197
- schema .fields ["call_genotype" ].dtype = dtype
197
+ schema .field_map () ["call_genotype" ].dtype = dtype
198
198
schema_path = tmp_path / "schema"
199
199
with open (schema_path , "w" ) as f :
200
200
f .write (schema .asjson ())
@@ -203,16 +203,23 @@ def test_genotype_dtype(self, tmp_path, icf_path, dtype):
203
203
assert root ["call_genotype" ].dtype == dtype
204
204
205
205
206
+ def get_field_dict (a_schema , name ):
207
+ d = a_schema .asdict ()
208
+ for field in d ["fields" ]:
209
+ if field ["name" ] == name :
210
+ return field
211
+
212
+
206
213
class TestDefaultSchema :
207
214
def test_format_version (self , schema ):
208
- assert schema [ " format_version" ] == vcf .ZARR_SCHEMA_FORMAT_VERSION
215
+ assert schema . format_version == vcf .ZARR_SCHEMA_FORMAT_VERSION
209
216
210
217
def test_chunk_size (self , schema ):
211
- assert schema [ " samples_chunk_size" ] == 1000
212
- assert schema [ " variants_chunk_size" ] == 10000
218
+ assert schema . samples_chunk_size == 1000
219
+ assert schema . variants_chunk_size == 10000
213
220
214
221
def test_dimensions (self , schema ):
215
- assert schema [ " dimensions" ] == [
222
+ assert schema . dimensions == [
216
223
"variants" ,
217
224
"samples" ,
218
225
"ploidy" ,
@@ -221,29 +228,29 @@ def test_dimensions(self, schema):
221
228
]
222
229
223
230
def test_samples (self , schema ):
224
- assert schema ["samples" ] == [
231
+ assert schema . asdict () ["samples" ] == [
225
232
{"id" : s } for s in ["NA00001" , "NA00002" , "NA00003" ]
226
233
]
227
234
228
235
def test_contigs (self , schema ):
229
- assert schema ["contigs" ] == [
236
+ assert schema . asdict () ["contigs" ] == [
230
237
{"id" : s , "length" : None } for s in ["19" , "20" , "X" ]
231
238
]
232
239
233
240
def test_filters (self , schema ):
234
- assert schema ["filters" ] == [
241
+ assert schema . asdict () ["filters" ] == [
235
242
{"id" : "PASS" , "description" : "All filters passed" },
236
243
{"id" : "s50" , "description" : "Less than 50% of samples have data" },
237
244
{"id" : "q10" , "description" : "Quality below 10" },
238
245
]
239
246
240
247
def test_variant_contig (self , schema ):
241
- assert schema [ "fields" ][ " variant_contig"] == {
248
+ assert get_field_dict ( schema , " variant_contig") == {
242
249
"name" : "variant_contig" ,
243
250
"dtype" : "i1" ,
244
- "shape" : [ 9 ] ,
245
- "chunks" : [ 10000 ] ,
246
- "dimensions" : [ "variants" ] ,
251
+ "shape" : ( 9 ,) ,
252
+ "chunks" : ( 10000 ,) ,
253
+ "dimensions" : ( "variants" ,) ,
247
254
"description" : "" ,
248
255
"vcf_field" : None ,
249
256
"compressor" : {
@@ -253,16 +260,16 @@ def test_variant_contig(self, schema):
253
260
"shuffle" : 0 ,
254
261
"blocksize" : 0 ,
255
262
},
256
- "filters" : [] ,
263
+ "filters" : tuple () ,
257
264
}
258
265
259
266
def test_call_genotype (self , schema ):
260
- assert schema [ "fields" ][ " call_genotype"] == {
267
+ assert get_field_dict ( schema , " call_genotype") == {
261
268
"name" : "call_genotype" ,
262
269
"dtype" : "i1" ,
263
- "shape" : [ 9 , 3 , 2 ] ,
264
- "chunks" : [ 10000 , 1000 ] ,
265
- "dimensions" : [ "variants" , "samples" , "ploidy" ] ,
270
+ "shape" : ( 9 , 3 , 2 ) ,
271
+ "chunks" : ( 10000 , 1000 ) ,
272
+ "dimensions" : ( "variants" , "samples" , "ploidy" ) ,
266
273
"description" : "" ,
267
274
"vcf_field" : None ,
268
275
"compressor" : {
@@ -272,16 +279,16 @@ def test_call_genotype(self, schema):
272
279
"shuffle" : 2 ,
273
280
"blocksize" : 0 ,
274
281
},
275
- "filters" : [] ,
282
+ "filters" : tuple () ,
276
283
}
277
284
278
285
def test_call_genotype_mask (self , schema ):
279
- assert schema [ "fields" ][ " call_genotype_mask"] == {
286
+ assert get_field_dict ( schema , " call_genotype_mask") == {
280
287
"name" : "call_genotype_mask" ,
281
288
"dtype" : "bool" ,
282
- "shape" : [ 9 , 3 , 2 ] ,
283
- "chunks" : [ 10000 , 1000 ] ,
284
- "dimensions" : [ "variants" , "samples" , "ploidy" ] ,
289
+ "shape" : ( 9 , 3 , 2 ) ,
290
+ "chunks" : ( 10000 , 1000 ) ,
291
+ "dimensions" : ( "variants" , "samples" , "ploidy" ) ,
285
292
"description" : "" ,
286
293
"vcf_field" : None ,
287
294
"compressor" : {
@@ -291,16 +298,16 @@ def test_call_genotype_mask(self, schema):
291
298
"shuffle" : 2 ,
292
299
"blocksize" : 0 ,
293
300
},
294
- "filters" : [] ,
301
+ "filters" : tuple () ,
295
302
}
296
303
297
304
def test_call_genotype_phased (self , schema ):
298
- assert schema [ "fields" ][ " call_genotype_mask"] == {
305
+ assert get_field_dict ( schema , " call_genotype_mask") == {
299
306
"name" : "call_genotype_mask" ,
300
307
"dtype" : "bool" ,
301
- "shape" : [ 9 , 3 , 2 ] ,
302
- "chunks" : [ 10000 , 1000 ] ,
303
- "dimensions" : [ "variants" , "samples" , "ploidy" ] ,
308
+ "shape" : ( 9 , 3 , 2 ) ,
309
+ "chunks" : ( 10000 , 1000 ) ,
310
+ "dimensions" : ( "variants" , "samples" , "ploidy" ) ,
304
311
"description" : "" ,
305
312
"vcf_field" : None ,
306
313
"compressor" : {
@@ -310,16 +317,16 @@ def test_call_genotype_phased(self, schema):
310
317
"shuffle" : 2 ,
311
318
"blocksize" : 0 ,
312
319
},
313
- "filters" : [] ,
320
+ "filters" : tuple () ,
314
321
}
315
322
316
323
def test_call_GQ (self , schema ):
317
- assert schema [ "fields" ][ " call_GQ"] == {
324
+ assert get_field_dict ( schema , " call_GQ") == {
318
325
"name" : "call_GQ" ,
319
326
"dtype" : "i1" ,
320
- "shape" : [ 9 , 3 ] ,
321
- "chunks" : [ 10000 , 1000 ] ,
322
- "dimensions" : [ "variants" , "samples" ] ,
327
+ "shape" : ( 9 , 3 ) ,
328
+ "chunks" : ( 10000 , 1000 ) ,
329
+ "dimensions" : ( "variants" , "samples" ) ,
323
330
"description" : "Genotype Quality" ,
324
331
"vcf_field" : "FORMAT/GQ" ,
325
332
"compressor" : {
@@ -329,7 +336,7 @@ def test_call_GQ(self, schema):
329
336
"shuffle" : 0 ,
330
337
"blocksize" : 0 ,
331
338
},
332
- "filters" : [] ,
339
+ "filters" : tuple () ,
333
340
}
334
341
335
342
@@ -379,7 +386,7 @@ class TestVcfDescriptions:
379
386
],
380
387
)
381
388
def test_fields (self , schema , field , description ):
382
- assert schema [ "fields" ][ field ][ " description" ] == description
389
+ assert schema . field_map ()[ field ]. description == description
383
390
384
391
# This information is not in the schema yet,
385
392
# https://github.com/sgkit-dev/bio2zarr/issues/123
0 commit comments