1
1
from typing import Optional
2
2
3
- import allel
4
- import dask .array as da
5
3
import numpy as np
6
4
import numpy .testing as npt
7
5
import pytest
8
6
from dask .dataframe import DataFrame
9
- from hypothesis import Phase , example , given , settings
10
7
from hypothesis import strategies as st
11
8
from hypothesis .extra .numpy import arrays
12
9
@@ -27,40 +24,27 @@ def test_rogers_huff_r_between():
27
24
gnb = np .array ([[0 , 1 , 2 ]])
28
25
npt .assert_allclose (rogers_huff_r_between (gna [0 ], gnb [0 ]), 1.0 , rtol = 1e-06 )
29
26
npt .assert_allclose (rogers_huff_r2_between (gna [0 ], gnb [0 ]), 1.0 , rtol = 1e-06 )
30
- npt .assert_allclose (
31
- allel .rogers_huff_r_between (gna , gnb ),
32
- rogers_huff_r_between (gna [0 ], gnb [0 ]),
33
- rtol = 1e-06 ,
34
- )
35
27
36
28
gna = np .array ([[0 , 1 , 2 ]])
37
29
gnb = np .array ([[2 , 1 , 0 ]])
38
30
npt .assert_allclose (rogers_huff_r_between (gna [0 ], gnb [0 ]), - 1.0 , rtol = 1e-06 )
39
31
npt .assert_allclose (rogers_huff_r2_between (gna [0 ], gnb [0 ]), 1.0 , rtol = 1e-06 )
40
- npt .assert_allclose (
41
- allel .rogers_huff_r_between (gna , gnb ),
42
- rogers_huff_r_between (gna [0 ], gnb [0 ]),
43
- rtol = 1e-06 ,
44
- )
45
32
46
33
gna = np .array ([[0 , 0 , 0 ]])
47
34
gnb = np .array ([[1 , 1 , 1 ]])
48
35
assert np .isnan (rogers_huff_r_between (gna [0 ], gnb [0 ]))
49
36
assert np .isnan (rogers_huff_r2_between (gna [0 ], gnb [0 ]))
50
- assert np .isnan (allel .rogers_huff_r_between (gna , gnb ))
51
37
52
38
gna = np .array ([[1 , 1 , 1 ]])
53
39
gnb = np .array ([[1 , 1 , 1 ]])
54
40
assert np .isnan (rogers_huff_r_between (gna [0 ], gnb [0 ]))
55
41
assert np .isnan (rogers_huff_r2_between (gna [0 ], gnb [0 ]))
56
- assert np .isnan (allel .rogers_huff_r_between (gna , gnb ))
57
42
58
43
# a case which fails if fastmath=True is enabled for rogers_huff_r_between
59
44
gna = np .full ((1 , 49 ), 2 )
60
45
gnb = np .full ((1 , 49 ), 2 )
61
46
assert np .isnan (rogers_huff_r_between (gna [0 ], gnb [0 ]))
62
47
assert np .isnan (rogers_huff_r2_between (gna [0 ], gnb [0 ]))
63
- assert np .isnan (allel .rogers_huff_r_between (gna , gnb ))
64
48
65
49
66
50
def ldm_df (
@@ -115,7 +99,16 @@ def test_threshold():
115
99
116
100
@pytest .mark .parametrize (
117
101
"dtype" ,
118
- [dtype for k , v in np .sctypes .items () for dtype in v if k in ["int" , "uint" ]], # type: ignore
102
+ [
103
+ np .int8 ,
104
+ np .int16 ,
105
+ np .int32 ,
106
+ np .int64 ,
107
+ np .uint8 ,
108
+ np .uint16 ,
109
+ np .uint32 ,
110
+ np .uint64 ,
111
+ ],
119
112
)
120
113
def test_dtypes (dtype ):
121
114
# Input matrices should work regardless of integer type
@@ -148,37 +141,6 @@ def ld_prune_args(draw):
148
141
return x , window , step , threshold , chunks
149
142
150
143
151
- # Phases setting without shrinking for complex, conditional draws in
152
- # which shrinking wastes time and adds little information
153
- # (see https://hypothesis.readthedocs.io/en/latest/settings.html#hypothesis.settings.phases)
154
- PHASES_NO_SHRINK = (Phase .explicit , Phase .reuse , Phase .generate , Phase .target )
155
-
156
-
157
- @given (args = ld_prune_args ()) # pylint: disable=no-value-for-parameter
158
- @settings (max_examples = 50 , deadline = None , phases = PHASES_NO_SHRINK )
159
- @example (args = (np .array ([[1 , 1 ], [1 , 1 ]], dtype = "uint8" ), 1 , 1 , 0.0 , - 1 ))
160
- @pytest .mark .skip (
161
- reason = "Hypothesis generates failures that need investigation: https://github.com/sgkit-dev/sgkit/issues/864"
162
- )
163
- def test_vs_skallel (args ):
164
- x , size , step , threshold , chunks = args
165
-
166
- ds = simulate_genotype_call_dataset (n_variant = x .shape [0 ], n_sample = x .shape [1 ])
167
- ds ["call_dosage" ] = (["variants" , "samples" ], da .asarray (x ).rechunk ({0 : chunks }))
168
- ds = window_by_variant (ds , size = size , step = step )
169
-
170
- ldm = ld_matrix (ds , threshold = threshold )
171
- has_duplicates = ldm .compute ().duplicated (subset = ["i" , "j" ]).any ()
172
- assert not has_duplicates
173
- idx_drop_ds = maximal_independent_set (ldm )
174
-
175
- idx_drop = np .sort (idx_drop_ds .ld_prune_index_to_drop .data )
176
- m = allel .locate_unlinked (x , size = size , step = step , threshold = threshold )
177
- idx_drop_ska = np .sort (np .argwhere (~ m ).squeeze (axis = 1 ))
178
-
179
- npt .assert_equal (idx_drop_ska , idx_drop )
180
-
181
-
182
144
def test_scores ():
183
145
# Create zero row vectors except for 1st and 11th
184
146
# (make them have non-zero variance)
0 commit comments