@@ -118,6 +118,80 @@ def test_genomic_relationship__VanRaden_AGHmatrix_tetraploid(chunks):
118118 np .testing .assert_array_almost_equal (actual , expect )
119119
120120
121+ def test_genomic_relationship__VanRaden_skipna ():
122+ # Test that skipna option skips values in call_dosage
123+ # such that the relationship between each pair of individuals
124+ # is calculated using only the variants where neither sample
125+ # has missing data.
126+ # This should be equivalent to calculating the GRM using
127+ # multiple subsets of the variants and using pairwise
128+ # values from the larges subset of variants that doesn't
129+ # result in a nan value.
130+ nan = np .nan
131+ dosage = np .array (
132+ [
133+ [0.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 0.0 , 1.0 , 2.0 , 0.0 ],
134+ [1.0 , 1.0 , 1.0 , 2.0 , nan , 1.0 , 1.0 , 0.0 , 1.0 , 2.0 ],
135+ [2.0 , 2.0 , 0.0 , 0.0 , nan , 1.0 , 1.0 , 1.0 , 0.0 , 1.0 ],
136+ [1.0 , 0.0 , 0.0 , 0.0 , nan , 1.0 , 1.0 , 1.0 , 1.0 , 0.0 ],
137+ [1.0 , 0.0 , 1.0 , 1.0 , nan , 2.0 , 0.0 , 1.0 , 0.0 , 2.0 ],
138+ [2.0 , 1.0 , 1.0 , 1.0 , nan , 1.0 , 2.0 , nan , 0.0 , 1.0 ],
139+ [2.0 , 0.0 , 1.0 , 1.0 , nan , 2.0 , 1.0 , nan , 1.0 , 1.0 ],
140+ [1.0 , 1.0 , 1.0 , 2.0 , nan , 1.0 , 2.0 , nan , 1.0 , 0.0 ],
141+ [1.0 , 0.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , nan , 1.0 , 1.0 ],
142+ [2.0 , 1.0 , 1.0 , 1.0 , 1.0 , 2.0 , 1.0 , nan , 2.0 , 1.0 ],
143+ [1.0 , 2.0 , 2.0 , 1.0 , 2.0 , 0.0 , 1.0 , nan , 1.0 , 2.0 ],
144+ [0.0 , 0.0 , 1.0 , 2.0 , 0.0 , 1.0 , 0.0 , nan , 1.0 , 2.0 ],
145+ [1.0 , 2.0 , 1.0 , 2.0 , 2.0 , 0.0 , 1.0 , nan , 1.0 , 0.0 ],
146+ [0.0 , 2.0 , 1.0 , 1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 1.0 , 0.0 ],
147+ [1.0 , 1.0 , 2.0 , 1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 , 2.0 ],
148+ [2.0 , 0.0 , 2.0 , 2.0 , 1.0 , 1.0 , 1.0 , 1.0 , 0.0 , 2.0 ],
149+ [1.0 , 0.0 , 1.0 , 1.0 , 1.0 , 2.0 , 2.0 , 1.0 , 2.0 , 1.0 ],
150+ [2.0 , 1.0 , 2.0 , 1.0 , 1.0 , 1.0 , 2.0 , 1.0 , 1.0 , 1.0 ],
151+ [1.0 , 1.0 , 2.0 , 1.0 , 1.0 , 2.0 , 0.0 , 2.0 , 1.0 , 2.0 ],
152+ [1.0 , 0.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 ],
153+ ]
154+ )
155+ ds = xr .Dataset ()
156+ ds ["call_dosage" ] = ["variants" , "samples" ], dosage
157+ ds ["ancestral_frequency" ] = "variants" , np .ones (len (dosage )) / 2
158+ # calculating without skipna will result in nans in the GRM
159+ expect = sg .genomic_relationship (
160+ ds ,
161+ call_dosage = "call_dosage" ,
162+ ancestral_frequency = "ancestral_frequency" ,
163+ estimator = "VanRaden" ,
164+ ploidy = 2 ,
165+ skipna = False ,
166+ ).stat_genomic_relationship .values
167+ assert np .isnan (expect ).sum () > 0
168+ # fill nan values using maximum subsets without missing data
169+ idx_0 = ~ np .isnan (dosage [:, 4 ])
170+ idx_1 = ~ np .isnan (dosage [:, 7 ])
171+ idx_2 = np .logical_and (idx_0 , idx_1 )
172+ for idx in [idx_0 , idx_1 , idx_2 ]:
173+ sub = ds .sel (dict (variants = idx ))
174+ sub_expect = sg .genomic_relationship (
175+ sub ,
176+ call_dosage = "call_dosage" ,
177+ ancestral_frequency = "ancestral_frequency" ,
178+ estimator = "VanRaden" ,
179+ ploidy = 2 ,
180+ skipna = False ,
181+ ).stat_genomic_relationship .values
182+ expect = np .where (np .isnan (expect ), sub_expect , expect )
183+ # calculate actual value using skipna=True
184+ actual = sg .genomic_relationship (
185+ ds ,
186+ call_dosage = "call_dosage" ,
187+ ancestral_frequency = "ancestral_frequency" ,
188+ estimator = "VanRaden" ,
189+ ploidy = 2 ,
190+ skipna = True ,
191+ ).stat_genomic_relationship .values
192+ np .testing .assert_array_equal (actual , expect )
193+
194+
121195@pytest .mark .parametrize ("ploidy" , [2 , 4 ])
122196def test_genomic_relationship__detect_ploidy (ploidy ):
123197 ds = xr .Dataset ()
0 commit comments