7
7
from xarray import Dataset
8
8
9
9
from sgkit import variables
10
- from sgkit .accelerate import numba_guvectorize , numba_jit
11
10
from sgkit .typing import ArrayLike
12
11
from sgkit .utils import (
13
12
conditional_merge_datasets ,
14
13
create_dataset ,
15
14
define_variable_if_absent ,
16
15
)
17
16
18
- from .pedigree import (
19
- _compress_hamilton_kerr_parameters ,
20
- parent_indices ,
21
- topological_argsort ,
22
- )
17
+ from .pedigree import parent_indices
23
18
24
19
EST_DIPLOID = "diploid"
25
20
EST_HAMILTON_KERR = "Hamilton-Kerr"
26
21
27
22
28
- @numba_guvectorize ( # type: ignore
29
- [
30
- "void(int8[:,:], int64[:,:], uint32, int8[:,:])" ,
31
- "void(int16[:,:], int64[:,:], uint32, int16[:,:])" ,
32
- "void(int32[:,:], int64[:,:], uint32, int32[:,:])" ,
33
- "void(int64[:,:], int64[:,:], uint32, int64[:,:])" ,
34
- ],
35
- "(n,k),(n,p),()->(n,k)" ,
36
- )
37
- def genedrop_diploid (
38
- genotypes : ArrayLike ,
39
- parent : ArrayLike ,
40
- seed : ArrayLike ,
41
- out : ArrayLike ,
42
- ) -> None : # pragma: no cover
43
- n_sample , n_parent = parent .shape
44
- _ , ploidy = genotypes .shape
45
- if n_parent != 2 :
46
- raise ValueError ("The parents dimension must be length 2" )
47
- if ploidy != 2 :
48
- raise ValueError ("Genotypes are not diploid" )
49
- order = topological_argsort (parent )
50
- np .random .seed (seed )
51
- for i in range (n_sample ):
52
- t = order [i ]
53
- unknown_parent = 0
54
- for j in range (n_parent ):
55
- p = parent [t , j ]
56
- if p < 0 :
57
- # founder
58
- unknown_parent += 1
59
- else :
60
- idx = np .random .randint (2 )
61
- out [t , j ] = out [p , idx ]
62
- if unknown_parent == 1 :
63
- raise ValueError ("Pedigree contains half-founders" )
64
- elif unknown_parent == 2 :
65
- # copy founder
66
- out [t , 0 ] = genotypes [t , 0 ]
67
- out [t , 1 ] = genotypes [t , 1 ]
68
-
69
-
70
- @numba_jit (nogil = True )
71
- def _random_gamete_Hamilton_Kerr (
72
- genotype : ArrayLike , ploidy : int , tau : int , lambda_ : float
73
- ) -> ArrayLike :
74
- if ploidy < len (genotype ):
75
- # remove fill values
76
- genotype = genotype [genotype > - 2 ]
77
- if ploidy != len (genotype ):
78
- raise ValueError ("Genotype ploidy does not match number of alleles" )
79
- if tau > ploidy :
80
- # TODO: this can be an option for encoding somatic genome duplication (with suitable lambda)
81
- raise NotImplementedError ("Gamete tau cannot exceed parental ploidy" )
82
- gamete = np .random .choice (genotype , tau , replace = False )
83
- if lambda_ > 0.0 :
84
- if tau == 2 :
85
- if np .random .rand () <= lambda_ :
86
- gamete [1 ] = gamete [0 ]
87
- elif tau != 2 :
88
- raise NotImplementedError ("Non-zero lambda is only implemented for tau = 2" )
89
- return gamete
90
-
91
-
92
- @numba_guvectorize ( # type: ignore
93
- [
94
- "void(int8[:,:], int64[:,:], uint64[:,:], float64[:,:], uint32, int8[:,:])" ,
95
- "void(int16[:,:], int64[:,:], uint64[:,:], float64[:,:], uint32, int16[:,:])" ,
96
- "void(int32[:,:], int64[:,:], uint64[:,:], float64[:,:], uint32, int32[:,:])" ,
97
- "void(int64[:,:], int64[:,:], uint64[:,:], float64[:,:], uint32, int64[:,:])" ,
98
- ],
99
- "(n,k),(n,p),(n,p),(n,p),()->(n,k)" ,
100
- )
101
- def genedrop_Hamilton_Kerr (
102
- genotypes : ArrayLike ,
103
- parent : ArrayLike ,
104
- tau : ArrayLike ,
105
- lambda_ : ArrayLike ,
106
- seed : int ,
107
- out : ArrayLike ,
108
- ) -> None : # pragma: no cover
109
- if parent .shape [1 ] != 2 :
110
- parent , tau , lambda_ = _compress_hamilton_kerr_parameters (parent , tau , lambda_ )
111
- n_sample , n_parent = parent .shape
112
- _ , max_ploidy = genotypes .shape
113
- order = topological_argsort (parent )
114
- np .random .seed (seed )
115
- for i in range (n_sample ):
116
- t = order [i ]
117
- alleles = 0
118
- unknown_alleles = 0
119
- for j in range (n_parent ):
120
- p = parent [t , j ]
121
- tau_p = tau [t , j ]
122
- lambda_p = lambda_ [t , j ]
123
- if p < 0 :
124
- unknown_alleles += tau_p
125
- elif tau_p > 0 :
126
- ploidy_p = tau [p , 0 ] + tau [p , 1 ]
127
- gamete = _random_gamete_Hamilton_Kerr (out [p ], ploidy_p , tau_p , lambda_p )
128
- out [t , alleles : alleles + tau_p ] = gamete
129
- alleles += tau_p
130
- if unknown_alleles == 0 :
131
- if alleles < max_ploidy :
132
- # pad with fill value
133
- out [t , alleles :] = - 2
134
- elif unknown_alleles == alleles :
135
- # founder
136
- out [t ] = genotypes [t ]
137
- else :
138
- # partial founder
139
- raise ValueError ("Pedigree contains half-founders" )
140
-
141
-
142
23
def simulate_genedrop (
143
24
ds : Dataset ,
144
25
* ,
@@ -165,7 +46,7 @@ def simulate_genedrop(
165
46
which is only suitable for pedigrees in which all samples are
166
47
diploids resulting from sexual reproduction.
167
48
The "Hamilton-Kerr" method is suitable for autopolyploid and
168
- mixed-ploidy datasets following Kerr et al. 2012 and [2]
49
+ mixed-ploidy datasets following Kerr et al. 2012 [2] and
169
50
Hamilton and Kerr 2017 [3].
170
51
call_genotype
171
52
Input variable name holding call_genotype as defined by
@@ -217,8 +98,8 @@ def simulate_genedrop(
217
98
If the Hamilton-Kerr method is used and a sample has more than
218
99
two contributing parents.
219
100
ValueError
220
- If the Hamilton-Kerr method is used and the number of alleles
221
- in a genotype does not match the sum of tau values (i.e., ploidy).
101
+ If the Hamilton-Kerr method is used and the number of alleles in a
102
+ founder genotype does not match the sum of its tau values (i.e., ploidy).
222
103
NotImplementedError
223
104
If the Hamilton-Kerr method is used and a tau value exceeds the
224
105
parental ploidy.
@@ -295,6 +176,8 @@ def simulate_genedrop(
295
176
"Computation of the inverse additive relationship matrix for autopolyploid
296
177
and multiple-ploidy populations." Theoretical and Applied Genetics 131: 851-860.
297
178
"""
179
+ from .genedrop_numba_fns import genedrop_diploid , genedrop_Hamilton_Kerr
180
+
298
181
ds = define_variable_if_absent (ds , variables .parent , parent , parent_indices )
299
182
variables .validate (
300
183
ds , {parent : variables .parent_spec , call_genotype : variables .call_genotype_spec }
0 commit comments