@@ -161,7 +161,7 @@ def for_field(
161161 ) -> "VcfFieldHandler" :
162162 if field == "FORMAT/GT" :
163163 return GenotypeFieldHandler (
164- vcf , chunk_length , ploidy , mixed_ploidy , truncate_calls
164+ vcf , chunk_length , ploidy , mixed_ploidy , truncate_calls , max_alt_alleles
165165 )
166166 category = field .split ("/" )[0 ]
167167 vcf_field_defs = _get_vcf_field_defs (vcf , category )
@@ -286,13 +286,15 @@ def __init__(
286286 ploidy : int ,
287287 mixed_ploidy : bool ,
288288 truncate_calls : bool ,
289+ max_alt_alleles : int ,
289290 ) -> None :
290291 n_sample = len (vcf .samples )
291292 self .call_genotype = np .empty ((chunk_length , n_sample , ploidy ), dtype = "i1" )
292293 self .call_genotype_phased = np .empty ((chunk_length , n_sample ), dtype = bool )
293294 self .ploidy = ploidy
294295 self .mixed_ploidy = mixed_ploidy
295296 self .truncate_calls = truncate_calls
297+ self .max_alt_alleles = max_alt_alleles
296298
297299 def add_variant (self , i : int , variant : Any ) -> None :
298300 fill = - 2 if self .mixed_ploidy else - 1
@@ -305,6 +307,10 @@ def add_variant(self, i: int, variant: Any) -> None:
305307 self .call_genotype [i , ..., 0 :n ] = gt [..., 0 :n ]
306308 self .call_genotype [i , ..., n :] = fill
307309 self .call_genotype_phased [i ] = gt [..., - 1 ]
310+
311+ # set any calls that exceed maximum number of alt alleles as missing
312+ self .call_genotype [i ][self .call_genotype [i ] > self .max_alt_alleles ] = - 1
313+
308314 else :
309315 self .call_genotype [i ] = fill
310316 self .call_genotype_phased [i ] = 0
@@ -616,7 +622,8 @@ def vcf_to_zarrs(
616622 max_alt_alleles
617623 The (maximum) number of alternate alleles in the VCF file. Any records with more than
618624 this number of alternate alleles will have the extra alleles dropped (the `variant_allele`
619- variable will be truncated). Call genotype fields will however be unaffected.
625+ variable will be truncated). Any call genotype fields with the extra alleles will
626+ be changed to the missing-allele sentinel value of -1.
620627 fields
621628 Extra fields to extract data for. A list of strings, with ``INFO`` or ``FORMAT`` prefixes.
622629 Wildcards are permitted too, for example: ``["INFO/*", "FORMAT/DP"]``.
@@ -784,7 +791,8 @@ def vcf_to_zarr(
784791 max_alt_alleles
785792 The (maximum) number of alternate alleles in the VCF file. Any records with more than
786793 this number of alternate alleles will have the extra alleles dropped (the `variant_allele`
787- variable will be truncated). Call genotype fields will however be unaffected.
794+ variable will be truncated). Any call genotype fields with the extra alleles will
795+ be changed to the missing-allele sentinel value of -1.
788796 fields
789797 Extra fields to extract data for. A list of strings, with ``INFO`` or ``FORMAT`` prefixes.
790798 Wildcards are permitted too, for example: ``["INFO/*", "FORMAT/DP"]``.
0 commit comments