diff --git a/bio2zarr/vcz.py b/bio2zarr/vcz.py index 12dbd2ff..18a79ee7 100644 --- a/bio2zarr/vcz.py +++ b/bio2zarr/vcz.py @@ -520,7 +520,7 @@ def init( # Doing this synchronously - this is fine surely self.encode_samples(root) if self.source.filters is not None: - self.encode_filter_id(root) + self.encode_filters(root) if self.source.contigs is not None: self.encode_contigs(root) @@ -581,9 +581,7 @@ def encode_contigs(self, root): ) array.attrs["_ARRAY_DIMENSIONS"] = ["contigs"] - def encode_filter_id(self, root): - # TODO need a way to store description also - # https://github.com/sgkit-dev/vcf-zarr-spec/issues/19 + def encode_filters(self, root): filters = self.source.filters array = root.array( "filter_id", @@ -593,6 +591,14 @@ def encode_filter_id(self, root): compressor=DEFAULT_ZARR_COMPRESSOR, ) array.attrs["_ARRAY_DIMENSIONS"] = ["filters"] + array = root.array( + "filter_description", + data=[filt.description for filt in filters], + shape=len(filters), + dtype="str", + compressor=DEFAULT_ZARR_COMPRESSOR, + ) + array.attrs["_ARRAY_DIMENSIONS"] = ["filters"] def init_array(self, root, array_spec, variants_dim_size): kwargs = dict(zarr_utils.ZARR_FORMAT_KWARGS) diff --git a/tests/test_vcf_examples.py b/tests/test_vcf_examples.py index 52919722..fa333f36 100644 --- a/tests/test_vcf_examples.py +++ b/tests/test_vcf_examples.py @@ -30,6 +30,14 @@ def ds(self, tmp_path_factory): def test_filters(self, ds): nt.assert_array_equal(ds["filter_id"], ["PASS", "s50", "q10"]) + nt.assert_array_equal( + ds["filter_description"], + [ + "All filters passed", + "Less than 50% of samples have data", + "Quality below 10", + ], + ) nt.assert_array_equal( ds["variant_filter"], [ @@ -957,6 +965,7 @@ def test_info_fields(self, ds): "contig_id", "contig_length", "filter_id", + "filter_description", "region_index", "sample_id", ] diff --git a/tests/test_vcz.py b/tests/test_vcz.py index 49f863a7..1bd73162 100644 --- a/tests/test_vcz.py +++ b/tests/test_vcz.py @@ -740,6 +740,7 @@ def test_vcz(self, zarr_path): "/sample_id", "/variant_id_mask", "/filter_id", + "/filter_description", "/contig_id", ] nt.assert_array_equal(sorted(df["name"]), sorted(fields))