3
3
import pytest
4
4
import xarray .testing as xt
5
5
import sgkit as sg
6
+ import zarr
6
7
7
8
from bio2zarr import vcf
8
9
@@ -100,7 +101,6 @@ def test_exploded_metadata_mismatch(self, tmpdir, icf_path, version):
100
101
101
102
102
103
class TestEncodeDimensionSeparator :
103
-
104
104
@pytest .mark .parametrize ("dimension_separator" , [None , "/" ])
105
105
def test_directories (self , tmp_path , icf_path , dimension_separator ):
106
106
zarr_path = tmp_path / "zarr"
@@ -122,6 +122,77 @@ def test_bad_value(self, tmp_path, icf_path, dimension_separator):
122
122
vcf .encode (icf_path , zarr_path , dimension_separator = dimension_separator )
123
123
124
124
125
+ class TestSchemaJsonRoundTrip :
126
+ def assert_json_round_trip (self , schema ):
127
+ schema2 = vcf .VcfZarrSchema .fromjson (schema .asjson ())
128
+ assert schema == schema2
129
+
130
+ def test_generated_no_changes (self , icf_path ):
131
+ icf = vcf .IntermediateColumnarFormat (icf_path )
132
+ self .assert_json_round_trip (vcf .VcfZarrSchema .generate (icf ))
133
+
134
+ def test_generated_no_columns (self , icf_path ):
135
+ icf = vcf .IntermediateColumnarFormat (icf_path )
136
+ schema = vcf .VcfZarrSchema .generate (icf )
137
+ schema .columns .clear ()
138
+ self .assert_json_round_trip (schema )
139
+
140
+ def test_generated_no_samples (self , icf_path ):
141
+ icf = vcf .IntermediateColumnarFormat (icf_path )
142
+ schema = vcf .VcfZarrSchema .generate (icf )
143
+ schema .sample_id .clear ()
144
+ self .assert_json_round_trip (schema )
145
+
146
+ def test_generated_change_dtype (self , icf_path ):
147
+ icf = vcf .IntermediateColumnarFormat (icf_path )
148
+ schema = vcf .VcfZarrSchema .generate (icf )
149
+ schema .columns ["variant_position" ].dtype = "i8"
150
+ self .assert_json_round_trip (schema )
151
+
152
+ def test_generated_change_compressor (self , icf_path ):
153
+ icf = vcf .IntermediateColumnarFormat (icf_path )
154
+ schema = vcf .VcfZarrSchema .generate (icf )
155
+ schema .columns ["variant_position" ].compressor = {"cname" : "FAKE" }
156
+ self .assert_json_round_trip (schema )
157
+
158
+
159
+ class TestSchemaEncode :
160
+ @pytest .mark .parametrize (
161
+ ["cname" , "clevel" , "shuffle" ], [("lz4" , 1 , 0 ), ("zlib" , 7 , 1 ), ("zstd" , 4 , 2 )]
162
+ )
163
+ def test_codec (self , tmp_path , icf_path , cname , clevel , shuffle ):
164
+ zarr_path = tmp_path / "zarr"
165
+ icf = vcf .IntermediateColumnarFormat (icf_path )
166
+ schema = vcf .VcfZarrSchema .generate (icf )
167
+ for var in schema .columns .values ():
168
+ var .compressor ["cname" ] = cname
169
+ var .compressor ["clevel" ] = clevel
170
+ var .compressor ["shuffle" ] = shuffle
171
+ schema_path = tmp_path / "schema"
172
+ with open (schema_path , "w" ) as f :
173
+ f .write (schema .asjson ())
174
+ vcf .encode (icf_path , zarr_path , schema_path = schema_path )
175
+ root = zarr .open (zarr_path )
176
+ for var in schema .columns .values ():
177
+ a = root [var .name ]
178
+ assert a .compressor .cname == cname
179
+ assert a .compressor .clevel == clevel
180
+ assert a .compressor .shuffle == shuffle
181
+
182
+ @pytest .mark .parametrize ("dtype" , ["i4" , "i8" ])
183
+ def test_genotype_dtype (self , tmp_path , icf_path , dtype ):
184
+ zarr_path = tmp_path / "zarr"
185
+ icf = vcf .IntermediateColumnarFormat (icf_path )
186
+ schema = vcf .VcfZarrSchema .generate (icf )
187
+ schema .columns ["call_genotype" ].dtype = dtype
188
+ schema_path = tmp_path / "schema"
189
+ with open (schema_path , "w" ) as f :
190
+ f .write (schema .asjson ())
191
+ vcf .encode (icf_path , zarr_path , schema_path = schema_path )
192
+ root = zarr .open (zarr_path )
193
+ assert root ["call_genotype" ].dtype == dtype
194
+
195
+
125
196
class TestDefaultSchema :
126
197
def test_format_version (self , schema ):
127
198
assert schema ["format_version" ] == vcf .ZARR_SCHEMA_FORMAT_VERSION
0 commit comments