Skip to content

Commit 1f647ab

Browse files
committed
Check dimension sizes for named VCF Number fields
1 parent d30940e commit 1f647ab

File tree

2 files changed

+24
-14
lines changed

2 files changed

+24
-14
lines changed

bio2zarr/icf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,6 +1042,7 @@ def generate_schema(
10421042
if local_alleles is None:
10431043
local_alleles = False
10441044

1045+
max_alleles = max(self.fields["ALT"].vcf_field.summary.max_number + 1, 2)
10451046
dimensions = {
10461047
"variants": vcz.VcfZarrDimension(
10471048
size=m, chunk_size=variants_chunk_size or vcz.DEFAULT_VARIANT_CHUNK_SIZE
@@ -1050,9 +1051,8 @@ def generate_schema(
10501051
size=n, chunk_size=samples_chunk_size or vcz.DEFAULT_SAMPLE_CHUNK_SIZE
10511052
),
10521053
# ploidy added conditionally below
1053-
"alleles": vcz.VcfZarrDimension(
1054-
size=max(self.fields["ALT"].vcf_field.summary.max_number + 1, 2)
1055-
),
1054+
"alleles": vcz.VcfZarrDimension(size=max_alleles),
1055+
"alt_alleles": vcz.VcfZarrDimension(size=max_alleles - 1),
10561056
"filters": vcz.VcfZarrDimension(size=self.metadata.num_filters),
10571057
}
10581058

bio2zarr/vcz.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -174,19 +174,29 @@ def from_field(
174174
array_name = prefix + vcf_field.name
175175

176176
max_number = vcf_field.max_number
177-
if (max_number > 0 and vcf_field.vcf_number in ("R", "A", "G")) or (
178-
max_number > 1 or vcf_field.full_name == "FORMAT/LAA"
179-
):
180-
# TODO we should really be checking this to see if the named dimensions
181-
# are actually correct.
182-
if vcf_field.vcf_number == "R":
177+
if vcf_field.vcf_number == "R":
178+
max_alleles = schema.dimensions["alleles"].size
179+
if max_number > max_alleles:
180+
raise ValueError(
181+
f"Max number of values {max_number} exceeds max alleles "
182+
f"{max_alleles} for {vcf_field.full_name}"
183+
)
184+
if max_alleles > 0:
183185
dimensions.append("alleles")
184-
elif vcf_field.vcf_number == "A":
186+
elif vcf_field.vcf_number == "A":
187+
max_alt_alleles = schema.dimensions["alt_alleles"].size
188+
if max_number > max_alt_alleles:
189+
raise ValueError(
190+
f"Max number of values {max_number} exceeds max alt alleles "
191+
f"{max_alt_alleles} for {vcf_field.full_name}"
192+
)
193+
if max_alt_alleles > 0:
185194
dimensions.append("alt_alleles")
186-
elif vcf_field.vcf_number == "G":
187-
dimensions.append("genotypes")
188-
else:
189-
dimensions.append(f"{vcf_field.category}_{vcf_field.name}_dim")
195+
elif max_number > 0 and vcf_field.vcf_number == "G":
196+
# TODO: need max_genotypes
197+
dimensions.append("genotypes")
198+
elif max_number > 1 or vcf_field.full_name == "FORMAT/LAA":
199+
dimensions.append(f"{vcf_field.category}_{vcf_field.name}_dim")
190200
if dimensions[-1] not in schema.dimensions:
191201
schema.dimensions[dimensions[-1]] = VcfZarrDimension(
192202
size=vcf_field.max_number

0 commit comments

Comments
 (0)