@@ -822,6 +822,62 @@ def __exit__(self, exc_type, exc_val, exc_tb):
822
822
return False
823
823
824
824
825
+ def convert_local_allele_field_types (fields ):
826
+ """
827
+ Update the specified list of fields to include the LAA field, and to convert
828
+ any supported localisable fields to the L* counterpart.
829
+
830
+ Note that we currently support only two ALT alleles per sample, and so the
831
+ dimensions of these fields are fixed by that requirement. Later versions may
832
+ use summary data storted in the ICF to make different choices, if information
833
+ about subsequent alleles (not in the actual genotype calls) should also be
834
+ stored.
835
+ """
836
+ fields_by_name = {field .name : field for field in fields }
837
+ gt = fields_by_name ["call_genotype" ]
838
+ if gt .shape [- 1 ] != 2 :
839
+ raise ValueError ("Local alleles only supported on diploid data" )
840
+
841
+ # TODO check if LA is already in here
842
+
843
+ shape = gt .shape [:- 1 ]
844
+ chunks = gt .chunks [:- 1 ]
845
+ dimensions = gt .dimensions [:- 1 ]
846
+
847
+ la = schema .ZarrArraySpec .new (
848
+ vcf_field = None ,
849
+ name = "call_LA" ,
850
+ dtype = "i1" ,
851
+ shape = gt .shape ,
852
+ chunks = gt .chunks ,
853
+ dimensions = (* dimensions , "local_alleles" ),
854
+ description = (
855
+ "0-based indices into REF+ALT, indicating which alleles"
856
+ " are relevant (local) for the current sample"
857
+ ),
858
+ )
859
+ ad = fields_by_name .get ("call_AD" , None )
860
+ if ad is not None :
861
+ # TODO check if call_LAD is in the list already
862
+ ad .name = "call_LAD"
863
+ ad .vcf_field = None
864
+ ad .shape = (* shape , 2 )
865
+ ad .chunks = (* chunks , 2 )
866
+ ad .dimensions = (* dimensions , "local_alleles" )
867
+ ad .description += " (local-alleles)"
868
+
869
+ pl = fields_by_name .get ("call_PL" , None )
870
+ if pl is not None :
871
+ # TODO check if call_LPL is in the list already
872
+ pl .name = "call_LPL"
873
+ pl .vcf_field = None
874
+ pl .shape = (* shape , 3 )
875
+ pl .chunks = (* chunks , 3 )
876
+ pl .description += " (local-alleles)"
877
+ pl .dimensions = (* dimensions , "local_" + pl .dimensions [- 1 ])
878
+ return [* fields , la ]
879
+
880
+
825
881
class IntermediateColumnarFormat (collections .abc .Mapping ):
826
882
def __init__ (self , path ):
827
883
self .path = pathlib .Path (path )
@@ -1110,8 +1166,6 @@ def fixed_field_spec(
1110
1166
)
1111
1167
1112
1168
if local_alleles :
1113
- from bio2zarr .vcf2zarr .vcz import convert_local_allele_field_types
1114
-
1115
1169
array_specs = convert_local_allele_field_types (array_specs )
1116
1170
1117
1171
return schema .VcfZarrSchema (
0 commit comments