@@ -149,7 +149,8 @@ def hardy_weinberg_test(
149149 Name of variable containing precomputed genotype counts for each variant
150150 as described in :data:`sgkit.variables.variant_genotype_count_spec`.
151151 If the variable is not present in ``ds``, it will be computed
152- using :func:`count_variant_genotypes`.
152+ using :func:`count_variant_genotypes` which automatically assigns
153+ coordinates to the ``genotypes`` dimension.
153154 ploidy
154155 Genotype ploidy, defaults to ``ploidy`` dimension of provided dataset.
155156 If the `ploidy` dimension is not present, then this value must be set explicitly.
@@ -168,10 +169,11 @@ def hardy_weinberg_test(
168169
169170 Warnings
170171 --------
171- This function is only applicable to diploid, biallelic datasets and the
172- ``genotype_count`` array should have three columns which respectively
173- contain counts for homozygous reference, heterozygous, and homozygous
174- alternate genotypes.
172+ This function is only applicable to diploid, biallelic datasets.
173+ The ``genotype_count`` array should have three columns corresponding
174+ to the ``genotypes`` dimension. These columns should have coordinates
175+ ``'0/0'``, ``'0/1'``, and ``'1/1'`` which respectively contain counts for
176+ homozygous reference, heterozygous, and homozygous alternate genotypes.
175177
176178 Returns
177179 -------
@@ -180,18 +182,24 @@ def hardy_weinberg_test(
180182 variant_hwe_p_value : [array-like, shape: (N, O)]
181183 P values from HWE test for each variant as float in [0, 1].
182184
185+ Raises
186+ ------
187+ NotImplementedError
188+ If the dataset is not limited to biallelic, diploid genotypes.
189+ ValueError
190+ If the ploidy or number of alleles are not specified and not
191+ present as dimensions in the dataset.
192+ ValueError
193+ If no coordinates are assigned to the ``genotypes`` dimension.
194+ KeyError
195+ If the genotypes ``'0/0'``, ``'0/1'`` or ``'1/1'`` are not specified
196+ as coordinates of the ``genotypes`` dimension.
197+
183198 References
184199 ----------
185200 - [1] Wigginton, Janis E., David J. Cutler, and Goncalo R. Abecasis. 2005.
186201 “A Note on Exact Tests of Hardy-Weinberg Equilibrium.” American Journal of
187202 Human Genetics 76 (5): 887–93.
188-
189- Raises
190- ------
191- NotImplementedError
192- If ploidy of provided dataset != 2
193- NotImplementedError
194- If maximum number of alleles in provided dataset != 2
195203 """
196204 ploidy = ploidy or ds .dims .get ("ploidy" )
197205 if not ploidy :
@@ -214,8 +222,18 @@ def hardy_weinberg_test(
214222 )
215223 variables .validate (ds , {genotype_count : variables .variant_genotype_count_spec })
216224
217- # extract counts by type
218- obs_hom1 , obs_hets , obs_hom2 = list (da .asarray (ds [genotype_count ].data ).T )
225+ # extract counts by coords
226+ if "genotypes" not in ds .coords :
227+ raise ValueError ("No coordinates for dimension 'genotypes'" )
228+ try :
229+ key = "0/0"
230+ obs_hom1 = da .asarray (ds [genotype_count ].loc [:, key ].data )
231+ key = "0/1"
232+ obs_hets = da .asarray (ds [genotype_count ].loc [:, key ].data )
233+ key = "1/1"
234+ obs_hom2 = da .asarray (ds [genotype_count ].loc [:, key ].data )
235+ except KeyError as e :
236+ raise KeyError ("No counts for genotype '{}'" .format (key )) from e
219237
220238 # note obs_het is expected first
221239 p = da .map_blocks (hardy_weinberg_p_value_vec_jit , obs_hets , obs_hom1 , obs_hom2 )
0 commit comments