@@ -177,28 +177,29 @@ def f(t):
177177 return np .array (ret ) * 2 * model .N0 # return in generations
178178
179179
180- def empirical_sfs (contigs ):
180+ def watterson_estimator (contigs ):
181181 with mp_pool () as p :
182- esfss = list (map (_esfs_helper , contigs ))
183- # Some contigs might be of a smaller sample size. Restrict to those
184- # that are "full dimensional"
185- d = {}
186- for e in esfss :
187- d .setdefault (e .shape , []).append (e )
188- return {k : np .sum (d [k ], axis = 0 ) for k in d }
182+ num = denom = 0
183+ for S , sample_sizes , spans in map (_watterson_helper , contigs ):
184+ num += S
185+ non_missing = sample_sizes > 0
186+ ss = sample_sizes [non_missing ]
187+ sp = spans [non_missing ]
188+ denom += (sp * (np .log (ss ) + 0.5 / ss + 0.57721 )).sum ()
189+ return num / denom
189190
190191
191- def _esfs_helper (contig ):
192+ def _watterson_helper (contig ):
192193 c = contig
193194 shp = [x + 1 for na in zip (c .a , c .n ) for x in na ]
194195 ret = np .zeros (shp , dtype = int )
195- nmiss = np . where ( np . all ( c .data [:, 1 :: 3 ] >= 0 , axis = 1 ) &
196- np .all (c .data [:, 3 ::3 ] == c . n , axis = 1 ))
197- for row in c .data [nmiss ]:
198- coord = tuple ([ x for ab in zip ( row [ 1 :: 3 ], row [ 2 :: 3 ]) for x in ab ] )
199- ret [ coord ] += row [ 0 ]
200- return ret
201-
196+ spans = c .data [:, 0 ]
197+ seg = ( np .any (c .data [:, 1 ::3 ] >= 1 , axis = 1 ) |
198+ np . any ( c .data [:, 2 :: 3 ] > 0 , axis = 1 ))
199+ S = spans [ seg ]. sum ( )
200+ sample_sizes = ( c . data [:, 3 :: 3 ]. sum ( axis = 1 ) +
201+ np . maximum ( 0 , c . data [:, 1 :: 3 ]). sum ( axis = 1 ))
202+ return ( S , sample_sizes , spans )
202203
203204def _load_data_helper (fn ):
204205 try :
0 commit comments