Skip to content

Commit 7c78a14

Browse files
tomwhitejeromekelleher
authored andcommitted
Add variant_length field
1 parent 5105ffe commit 7c78a14

File tree

4 files changed

+12
-3
lines changed

4 files changed

+12
-3
lines changed

bio2zarr/vcf2zarr/icf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ def make_field_def(name, vcf_type, vcf_number):
212212
make_field_def("FILTERS", "String", "."),
213213
make_field_def("REF", "String", "1"),
214214
make_field_def("ALT", "String", "."),
215+
make_field_def("rlen", "Integer", "1"), # computed field
215216
]
216217
return fields
217218

@@ -1276,6 +1277,7 @@ def process_partition(self, partition_index):
12761277
tcw.append("FILTERS", variant.FILTERS)
12771278
tcw.append("REF", variant.REF)
12781279
tcw.append("ALT", variant.ALT)
1280+
tcw.append("rlen", variant.end - variant.start)
12791281
for field in info_fields:
12801282
tcw.append(field.full_name, variant.INFO.get(field.name, None))
12811283
if has_gt:

bio2zarr/vcf2zarr/vcz.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def inspect(path):
3737
"variant_contig": "An identifier from the reference genome or an angle-bracketed ID"
3838
" string pointing to a contig in the assembly file",
3939
"variant_position": "The reference position",
40+
"variant_length": "The length of the variant measured in bases",
4041
"variant_id": "List of unique identifiers where applicable",
4142
"variant_allele": "List of the reference and alternate alleles",
4243
"variant_quality": "Phred-scaled quality score",
@@ -302,11 +303,12 @@ def fixed_field_spec(
302303
]
303304
name_map = {field.full_name: field for field in icf.metadata.fields}
304305

305-
# Only two of the fixed fields have a direct one-to-one mapping.
306+
# Only three of the fixed fields have a direct one-to-one mapping.
306307
array_specs.extend(
307308
[
308309
spec_from_field(name_map["QUAL"], array_name="variant_quality"),
309310
spec_from_field(name_map["POS"], array_name="variant_position"),
311+
spec_from_field(name_map["rlen"], array_name="variant_length"),
310312
]
311313
)
312314
array_specs.extend(

tests/test_icf.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class TestSmallExample:
1818
'ALT', 'CHROM', 'FILTERS', 'FORMAT/DP', 'FORMAT/GQ',
1919
'FORMAT/GT', 'FORMAT/HQ', 'ID', 'INFO/AA', 'INFO/AC',
2020
'INFO/AF', 'INFO/AN', 'INFO/DB', 'INFO/DP', 'INFO/H2',
21-
'INFO/NS', 'POS', 'QUAL', 'REF'
21+
'INFO/NS', 'POS', 'QUAL', 'REF', 'rlen'
2222
)
2323
# fmt: on
2424

@@ -117,6 +117,7 @@ class TestLocalAllelesExample:
117117
"POS",
118118
"QUAL",
119119
"REF",
120+
"rlen",
120121
)
121122

122123
@pytest.fixture(scope="class")
@@ -451,7 +452,7 @@ def icf(self, tmp_path_factory):
451452

452453
def test_repr(self, icf):
453454
assert repr(icf).startswith(
454-
"IntermediateColumnarFormat(fields=7, partitions=5, records=4665, path="
455+
"IntermediateColumnarFormat(fields=8, partitions=5, records=4665, path="
455456
)
456457

457458
def test_pos_repr(self, icf):

tests/test_vcf_examples.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ def test_position(self, ds):
5656
[111, 112, 14370, 17330, 1110696, 1230237, 1234567, 1235237, 10],
5757
)
5858

59+
def test_length(self, ds):
60+
nt.assert_array_equal(ds["variant_length"], [1, 1, 1, 1, 1, 1, 1, 1, 2])
61+
5962
def test_int_info_fields(self, ds):
6063
nt.assert_array_equal(
6164
ds["variant_NS"],
@@ -938,6 +941,7 @@ def test_info_fields(self, ds):
938941
"variant_filter",
939942
"variant_contig",
940943
"variant_position",
944+
"variant_length",
941945
"variant_allele",
942946
"variant_id",
943947
"variant_id_mask",

0 commit comments

Comments
 (0)