Skip to content

Commit 2dc4f7b

Browse files
Will-Tylerjeromekelleher
authored andcommitted
Only add LAA if PL is present
1 parent 9af381d commit 2dc4f7b

File tree

3 files changed

+50
-30
lines changed

3 files changed

+50
-30
lines changed

bio2zarr/vcf2zarr/icf.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ def scan_vcf(path, target_num_partitions, *, local_alleles):
239239
# Indicates whether vcf2zarr can introduce local alleles
240240
can_localize = False
241241
should_add_laa_field = True
242+
has_PL = False
242243
fields = fixed_vcf_field_definitions()
243244
for h in vcf.header_iter():
244245
if h["HeaderType"] in ["INFO", "FORMAT"]:
@@ -247,12 +248,15 @@ def scan_vcf(path, target_num_partitions, *, local_alleles):
247248
field.vcf_type = "Integer"
248249
field.vcf_number = "."
249250
fields.append(field)
250-
if field.category == "FORMAT" and field.name in {"GT", "AD"}:
251-
can_localize = True
252-
if (field.category, field.name) == ("FORMAT", "LAA"):
253-
should_add_laa_field = False
254-
255-
if local_alleles and can_localize and should_add_laa_field:
251+
if field.category == "FORMAT":
252+
if field.name in {"GT", "AD", "PL"}:
253+
can_localize = True
254+
if field.name == "PL":
255+
has_PL = True
256+
if field.name == "LAA":
257+
should_add_laa_field = False
258+
259+
if local_alleles and can_localize and should_add_laa_field and has_PL:
256260
laa_field = VcfField(
257261
category="FORMAT",
258262
name="LAA",

tests/test_icf.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ def icf(self, tmp_path_factory):
2727
out = tmp_path_factory.mktemp("data") / "example.exploded"
2828
return vcf2zarr.explode(out, [self.data_path], local_alleles=False)
2929

30-
@pytest.fixture(scope="class")
31-
def icf_local_alleles(self, tmp_path_factory):
32-
out = tmp_path_factory.mktemp("data") / "example.exploded"
33-
return vcf2zarr.explode(out, [self.data_path])
34-
3530
def test_format_version(self, icf):
3631
assert icf.metadata.format_version == icf_mod.ICF_METADATA_FORMAT_VERSION
3732

@@ -54,13 +49,6 @@ def test_summary_table(self, icf):
5449
fields = [d["name"] for d in data]
5550
assert tuple(sorted(fields)) == self.fields
5651

57-
def test_summary_table_local_allleles(self, icf_local_alleles):
58-
data = icf_local_alleles.summary_table()
59-
fields = [d["name"] for d in data]
60-
fields.sort()
61-
expected = tuple(sorted((*self.fields, "FORMAT/LAA")))
62-
assert tuple(fields) == expected
63-
6452
def test_inspect(self, icf):
6553
assert icf.summary_table() == vcf2zarr.inspect(icf.path)
6654

@@ -103,6 +91,44 @@ def test_INFO_NS(self, icf):
10391
assert icf["INFO/NS"].values == [None, None, 3, 3, 2, 3, 3, None, None]
10492

10593

94+
class TestLocalAllelesExample:
95+
data_path = "tests/data/vcf/local_alleles.vcf.gz"
96+
97+
fields = (
98+
"ALT",
99+
"CHROM",
100+
"FILTERS",
101+
"FORMAT/AD",
102+
"FORMAT/DP",
103+
"FORMAT/GQ",
104+
"FORMAT/GT",
105+
"FORMAT/LAA",
106+
"FORMAT/PL",
107+
"ID",
108+
"INFO/AA",
109+
"INFO/AC",
110+
"INFO/AF",
111+
"INFO/AN",
112+
"INFO/DB",
113+
"INFO/DP",
114+
"INFO/H2",
115+
"INFO/NS",
116+
"POS",
117+
"QUAL",
118+
"REF",
119+
)
120+
121+
@pytest.fixture(scope="class")
122+
def icf(self, tmp_path_factory):
123+
out = tmp_path_factory.mktemp("data") / "example.exploded"
124+
return vcf2zarr.explode(out, [self.data_path])
125+
126+
def test_summary_table(self, icf):
127+
data = icf.summary_table()
128+
fields = [d["name"] for d in data]
129+
assert tuple(sorted(fields)) == self.fields
130+
131+
106132
class TestIcfWriterExample:
107133
data_path = "tests/data/vcf/sample.vcf.gz"
108134

tests/test_vcf_examples.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -233,18 +233,8 @@ def test_call_HQ(self, ds):
233233
nt.assert_array_equal(ds["call_HQ"], call_HQ)
234234

235235
def test_call_LAA(self, ds):
236-
call_LAA = [
237-
[[-2, -2], [-2, -2], [1, -2]],
238-
[[-2, -2], [-2, -2], [1, -2]],
239-
[[-2, -2], [1, -2], [1, -2]],
240-
[[-2, -2], [1, -2], [-2, -2]],
241-
[[1, 2], [1, 2], [2, -2]],
242-
[[-2, -2], [-2, -2], [-2, -2]],
243-
[[1, -2], [2, -2], [-2, -2]],
244-
[[-2, -2], [-2, -2], [-2, -2]],
245-
[[-2, -2], [1, -2], [2, -2]],
246-
]
247-
nt.assert_array_equal(ds["call_LAA"], call_LAA)
236+
# The small example VCF does not have a PL field
237+
assert "call_LA" not in ds
248238

249239
def test_no_genotypes(self, ds, tmp_path):
250240
path = "tests/data/vcf/sample_no_genotypes.vcf.gz"

0 commit comments

Comments
 (0)