@@ -1057,40 +1057,41 @@ def iter_alleles_and_genotypes(self, start, stop, shape, num_alleles):
10571057 def generate_schema (
10581058 self , variants_chunk_size = None , samples_chunk_size = None , local_alleles = None
10591059 ):
1060- m = self .num_records
1061- n = self .num_samples
10621060 if local_alleles is None :
10631061 local_alleles = False
10641062
10651063 max_alleles = max (self .fields ["ALT" ].vcf_field .summary .max_number + 1 , 2 )
1066- dimensions = {
1067- "variants" : vcz .VcfZarrDimension (
1068- size = m , chunk_size = variants_chunk_size or vcz .DEFAULT_VARIANT_CHUNK_SIZE
1069- ),
1070- "samples" : vcz .VcfZarrDimension (
1071- size = n , chunk_size = samples_chunk_size or vcz .DEFAULT_SAMPLE_CHUNK_SIZE
1072- ),
1073- # ploidy and genotypes added conditionally below
1074- "alleles" : vcz .VcfZarrDimension (size = max_alleles ),
1075- "alt_alleles" : vcz .VcfZarrDimension (size = max_alleles - 1 ),
1076- "filters" : vcz .VcfZarrDimension (size = self .metadata .num_filters ),
1077- }
10781064
10791065 # Add ploidy and genotypes dimensions only when needed
10801066 max_genotypes = 0
10811067 for field in self .metadata .format_fields :
10821068 if field .vcf_number == "G" :
10831069 max_genotypes = max (max_genotypes , field .summary .max_number )
1070+
1071+ ploidy = None
1072+ genotypes_size = None
10841073 if self .gt_field is not None :
10851074 ploidy = max (self .gt_field .summary .max_number - 1 , 1 )
1086- dimensions ["ploidy" ] = vcz .VcfZarrDimension (size = ploidy )
1087- max_genotypes = math .comb (max_alleles + ploidy - 1 , ploidy )
1088- dimensions ["genotypes" ] = vcz .VcfZarrDimension (size = max_genotypes )
1075+ # NOTE: it's not clear why we're computing this, when we must have had
1076+ # at least one number=G field to require it anyway?
1077+ genotypes_size = math .comb (max_alleles + ploidy - 1 , ploidy )
1078+ # assert max_genotypes == genotypes_size
10891079 else :
10901080 if max_genotypes > 0 :
10911081 # there is no GT field, but there is at least one Number=G field,
10921082 # so need to define genotypes dimension
1093- dimensions ["genotypes" ] = vcz .VcfZarrDimension (size = max_genotypes )
1083+ genotypes_size = max_genotypes
1084+
1085+ dimensions = vcz .standard_dimensions (
1086+ variants_size = self .num_records ,
1087+ variants_chunk_size = variants_chunk_size ,
1088+ samples_size = self .num_samples ,
1089+ samples_chunk_size = samples_chunk_size ,
1090+ alleles_size = max_alleles ,
1091+ filters_size = self .metadata .num_filters ,
1092+ ploidy_size = ploidy ,
1093+ genotypes_size = genotypes_size ,
1094+ )
10941095
10951096 schema_instance = vcz .VcfZarrSchema (
10961097 format_version = vcz .ZARR_SCHEMA_FORMAT_VERSION ,
@@ -1173,7 +1174,7 @@ def fixed_field_spec(name, dtype, source=None, dimensions=("variants",)):
11731174 continue
11741175 array_specs .append (spec_from_field (field ))
11751176
1176- if self .gt_field is not None and n > 0 :
1177+ if self .gt_field is not None and self . num_samples > 0 :
11771178 array_specs .append (
11781179 vcz .ZarrArraySpec (
11791180 name = "call_genotype_phased" ,
0 commit comments