Skip to content

Commit 84d87d9

Browse files
committed
[core] fixes #25 (again)
1 parent ed9c8f9 commit 84d87d9

File tree

2 files changed

+10
-16
lines changed

2 files changed

+10
-16
lines changed

smcpp/analysis.py

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,6 @@ def _load_data(self, files):
9090
logger.info("%d population%s", self.npop, "" if self.npop == 1 else "s")
9191
self._esfs = estimation_tools.empirical_sfs(self._contigs)
9292
logger.debug("Empirical CSFS:\n%s", self._esfs)
93-
sfs = util.undistinguished_sfs(self._esfs)
94-
try:
95-
logger.debug("Normalized empirical CSFS:\n%s", self._esfs.astype('float') / self._esfs.sum())
96-
logger.debug("Normalized empirical SFS:\n%s", sfs.astype('float') / sfs.sum())
97-
except:
98-
pass
9993

10094
def _validate_data(self):
10195
for c in self._contigs:
@@ -145,12 +139,6 @@ def _normalize_data(self, length_cutoff, filter):
145139
w, het = np.array([a[2:] for k in attrs for a in attrs[k]]).T
146140
self._het = avg = np.average(het, weights=w)
147141
logger.debug("Heterozygosity: %f", self._het)
148-
try:
149-
logger.debug("1. - esfs[0]: %f", 1. - (self._esfs.flat[0] / self._esfs.sum()))
150-
except:
151-
# self._esfs.sum() == 0
152-
logger.warn("sum(esfs) = 0?")
153-
pass
154142
if self._het == 0:
155143
logger.error("Data contain *no* mutations. Inference is impossible.")
156144
sys.exit(1)
@@ -183,7 +171,9 @@ def _normalize_data(self, length_cutoff, filter):
183171

184172
def _calculate_t1_tK(self, args):
185173
n = 2 + max(self._ns)
186-
p = (self._esfs.sum().astype('float') - self._esfs[0, 0]) / self._esfs.sum()
174+
e_sum = np.sum([e.sum() for e in self._esfs.values()])
175+
e_0 = np.sum([e[0, 0] for e in self._esfs.values()])
176+
p = (e_sum.astype('float') - e_0) / e_sum
187177
Ne = p / self._theta / (2. * (1. / np.arange(1, n)).sum())
188178
logger.debug("Ne: %f", Ne)
189179
Ne *= 2 * self._N0

smcpp/estimation_tools.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import json
1010
import contextlib
1111
import pandas as pd
12+
import itertools
1213

1314
from . import _smcpp, util
1415
from .contig import Contig
@@ -175,13 +176,16 @@ def f(t):
175176
ret.append(np.inf)
176177
return np.array(ret) * 2 * model.N0 # return in generations
177178

179+
178180
def empirical_sfs(contigs):
179181
with mp_pool() as p:
180-
esfss = list(p.map(_esfs_helper, contigs))
182+
esfss = list(map(_esfs_helper, contigs))
181183
# Some contigs might be of a smaller sample size. Restrict to those
182184
# that are "full dimensional"
183-
shp = np.max([e.shape for e in esfss], axis=0)
184-
return np.sum([e for e in esfss if np.all(e.shape == shp)], axis=0, dtype=np.float32)
185+
d = {}
186+
for e in esfss:
187+
d.setdefault(e.shape, []).append(e)
188+
return {k: np.sum(d[k], axis=0) for k in d}
185189

186190

187191
def _esfs_helper(contig):

0 commit comments

Comments
 (0)