@@ -1064,12 +1064,28 @@ def generate_schema(
10641064 "samples" : vcz .VcfZarrDimension (
10651065 size = n , chunk_size = samples_chunk_size or vcz .DEFAULT_SAMPLE_CHUNK_SIZE
10661066 ),
1067- # ploidy added conditionally below
1067+ # ploidy and genotypes added conditionally below
10681068 "alleles" : vcz .VcfZarrDimension (size = max_alleles ),
10691069 "alt_alleles" : vcz .VcfZarrDimension (size = max_alleles - 1 ),
10701070 "filters" : vcz .VcfZarrDimension (size = self .metadata .num_filters ),
10711071 }
10721072
1073+ # Add ploidy and genotypes dimensions only when needed
1074+ max_genotypes = 0
1075+ for field in self .metadata .format_fields :
1076+ if field .vcf_number == "G" :
1077+ max_genotypes = max (max_genotypes , field .summary .max_number )
1078+ if self .gt_field is not None :
1079+ ploidy = max (self .gt_field .summary .max_number - 1 , 1 )
1080+ dimensions ["ploidy" ] = vcz .VcfZarrDimension (size = ploidy )
1081+ max_genotypes = math .comb (max_alleles + ploidy - 1 , ploidy )
1082+ dimensions ["genotypes" ] = vcz .VcfZarrDimension (size = max_genotypes )
1083+ else :
1084+ if max_genotypes > 0 :
1085+ # there is no GT field, but there is at least one Number=G field,
1086+ # so need to define genotypes dimension
1087+ dimensions ["genotypes" ] = vcz .VcfZarrDimension (size = max_genotypes )
1088+
10731089 schema_instance = vcz .VcfZarrSchema (
10741090 format_version = vcz .ZARR_SCHEMA_FORMAT_VERSION ,
10751091 dimensions = dimensions ,
@@ -1148,10 +1164,6 @@ def fixed_field_spec(name, dtype, source=None, dimensions=("variants",)):
11481164 array_specs .append (spec_from_field (field ))
11491165
11501166 if self .gt_field is not None and n > 0 :
1151- ploidy = max (self .gt_field .summary .max_number - 1 , 1 )
1152- # Add ploidy dimension only when needed
1153- schema_instance .dimensions ["ploidy" ] = vcz .VcfZarrDimension (size = ploidy )
1154-
11551167 array_specs .append (
11561168 vcz .ZarrArraySpec (
11571169 name = "call_genotype_phased" ,
0 commit comments