Skip to content

Commit fa3199d

Browse files
committed
Check dimension sizes for named VCF Number fields
1 parent ac46a92 commit fa3199d

File tree

2 files changed

+34
-17
lines changed

2 files changed

+34
-17
lines changed

bio2zarr/icf.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,11 +1054,15 @@ def generate_schema(
10541054
f"{schema_instance.variants_chunk_size, schema_instance.samples_chunk_size}"
10551055
)
10561056

1057+
alt_field = self.fields["ALT"]
1058+
max_alleles = alt_field.vcf_field.summary.max_number + 1
1059+
10571060
def spec_from_field(field, array_name=None):
10581061
return vcz.ZarrArraySpec.from_field(
10591062
field,
10601063
num_samples=n,
10611064
num_variants=m,
1065+
max_alleles=max_alleles,
10621066
samples_chunk_size=schema_instance.samples_chunk_size,
10631067
variants_chunk_size=schema_instance.variants_chunk_size,
10641068
array_name=array_name,
@@ -1088,9 +1092,6 @@ def fixed_field_spec(
10881092
compressor=compressor,
10891093
)
10901094

1091-
alt_field = self.fields["ALT"]
1092-
max_alleles = alt_field.vcf_field.summary.max_number + 1
1093-
10941095
array_specs = [
10951096
fixed_field_spec(
10961097
name="variant_contig",

bio2zarr/vcz.py

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ def from_field(
119119
*,
120120
num_variants,
121121
num_samples,
122+
max_alleles,
122123
variants_chunk_size,
123124
samples_chunk_size,
124125
array_name=None,
@@ -137,23 +138,38 @@ def from_field(
137138
if array_name is None:
138139
array_name = prefix + vcf_field.name
139140

140-
# TODO make an option to add in the empty extra dimension
141141
max_number = vcf_field.max_number
142-
if (max_number > 0 and vcf_field.vcf_number in ("R", "A", "G")) or (
143-
max_number > 1 or vcf_field.full_name == "FORMAT/LAA"
144-
):
145-
shape.append(max_number)
146-
chunks.append(max_number)
147-
# TODO we should really be checking this to see if the named dimensions
148-
# are actually correct.
149-
if vcf_field.vcf_number == "R":
142+
if vcf_field.vcf_number == "R":
143+
if max_number > max_alleles:
144+
raise ValueError(
145+
f"Max number of values {max_number} exceeds max alleles "
146+
f"{max_alleles} for {vcf_field.full_name}"
147+
)
148+
if max_alleles > 0:
149+
shape.append(max_alleles)
150+
chunks.append(max_alleles)
150151
dimensions.append("alleles")
151-
elif vcf_field.vcf_number == "A":
152+
elif vcf_field.vcf_number == "A":
153+
max_alt_alleles = max_alleles - 1
154+
if max_number > max_alt_alleles:
155+
raise ValueError(
156+
f"Max number of values {max_number} exceeds max alt alleles "
157+
f"{max_alt_alleles} for {vcf_field.full_name}"
158+
)
159+
if max_alt_alleles > 0:
160+
shape.append(max_alt_alleles)
161+
chunks.append(max_alt_alleles)
152162
dimensions.append("alt_alleles")
153-
elif vcf_field.vcf_number == "G":
154-
dimensions.append("genotypes")
155-
else:
156-
dimensions.append(f"{vcf_field.category}_{vcf_field.name}_dim")
163+
elif max_number > 0 and vcf_field.vcf_number == "G":
164+
# TODO: need max_genotypes
165+
shape.append(max_number)
166+
chunks.append(max_number)
167+
dimensions.append("genotypes")
168+
elif max_number > 1 or vcf_field.full_name == "FORMAT/LAA":
169+
# TODO make an option to add in the empty extra dimension
170+
shape.append(max_number)
171+
chunks.append(max_number)
172+
dimensions.append(f"{vcf_field.category}_{vcf_field.name}_dim")
157173
return ZarrArraySpec(
158174
source=vcf_field.full_name,
159175
name=array_name,

0 commit comments

Comments
 (0)