Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 0.1.4 2025-03-XX

- Fix bug in handling all-missing genotypes (#328)

# 0.1.3 2025-03-04

- Fix missing dependency issue for packaging
Expand Down
5 changes: 2 additions & 3 deletions bio2zarr/vcf2zarr/icf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1095,9 +1095,8 @@ def process_partition(self, partition_index):
for field in info_fields:
tcw.append(field.full_name, variant.INFO.get(field.name, None))
if has_gt:
if variant.genotype is None:
val = None
else:
val = None
if "GT" in variant.FORMAT and variant.genotype is not None:
val = variant.genotype.array()
tcw.append("FORMAT/GT", val)
for field in format_fields:
Expand Down
Binary file added tests/data/vcf/sample_all_missing_gts.vcf.gz
Binary file not shown.
Binary file added tests/data/vcf/sample_all_missing_gts.vcf.gz.csi
Binary file not shown.
4 changes: 2 additions & 2 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,8 @@ def test_examples(self, chunk_size, size, start, stop):
# It works in CI on Linux, but it'll probably break at some point.
# It's also necessary to update these numbers each time a new data
# file gets added
("tests/data", 4981734),
("tests/data/vcf", 4969597),
("tests/data", 4983044),
("tests/data/vcf", 4970907),
("tests/data/vcf/sample.vcf.gz", 1089),
],
)
Expand Down
23 changes: 23 additions & 0 deletions tests/test_vcf_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,29 @@ def test_region_index(self, ds):
)
nt.assert_array_equal(ds["region_index"], region_index)

def test_small_example_all_missing_gts(self, ds, tmp_path_factory):
data_path = "tests/data/vcf/sample_all_missing_gts.vcf.gz"
out = tmp_path_factory.mktemp("data") / "example.vcf.zarr"
vcf2zarr.convert([data_path], out, worker_processes=0)
ds2 = sg.load_dataset(out)

assert_dataset_equal(
ds,
ds2,
drop_vars=["call_genotype", "call_genotype_mask", "call_genotype_phased"],
)
gt1 = ds["call_genotype"].values
gt1[1] = -1
nt.assert_array_equal(gt1, ds2["call_genotype"].values)
m1 = ds["call_genotype_mask"].values
m1[1] = True
nt.assert_array_equal(m1, ds2["call_genotype_mask"].values)
p1 = ds["call_genotype_phased"].values
# NOTE: Not sure this is the correct behaviour, but testing here anyway
# to keep a record that this is what we're doing
p1[1] = True
nt.assert_array_equal(p1, ds2["call_genotype_phased"].values)


class TestSmallExampleLocalAlleles:
data_path = "tests/data/vcf/sample.vcf.gz"
Expand Down