Skip to content

Commit ed2f5ba

Browse files
Merge pull request #105 from theislab/singular_fix
Singular fix
2 parents 9a7d92b + 2fcbab6 commit ed2f5ba

File tree

5 files changed

+78
-12
lines changed

5 files changed

+78
-12
lines changed

batchglm/models/base_glm/simulator.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,16 @@ def constraints_loc(self):
171171
@property
172172
def constraints_scale(self):
173173
return np.identity(n=self.b_var.shape[0])
174+
175+
def np_clip_param(
176+
self,
177+
param,
178+
name
179+
):
180+
# TODO: inherit this from somewhere?
181+
bounds_min, bounds_max = self.param_bounds(param.dtype)
182+
return np.clip(
183+
param,
184+
bounds_min[name],
185+
bounds_max[name]
186+
)

batchglm/models/glm_nb/external.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,6 @@
55
from batchglm.models.base_glm import closedform_glm_mean, closedform_glm_scale
66

77
import batchglm.data as data_utils
8-
from batchglm.utils.linalg import groupwise_solve_lm
8+
from batchglm.utils.linalg import groupwise_solve_lm
9+
10+
from batchglm import pkg_constants

batchglm/models/glm_nb/simulator.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from .model import Model
44
from .external import _SimulatorGLM, InputDataGLM
5+
from .external import pkg_constants
56

67

78
class Simulator(_SimulatorGLM, Model):
@@ -58,3 +59,35 @@ def generate_data(self):
5859
design_scale_names=None
5960
)
6061

62+
def param_bounds(
63+
self,
64+
dtype
65+
):
66+
# TODO: inherit this from somewhere?
67+
dtype = np.dtype(dtype)
68+
dmin = np.finfo(dtype).min
69+
dmax = np.finfo(dtype).max
70+
dtype = dtype.type
71+
72+
sf = dtype(pkg_constants.ACCURACY_MARGIN_RELATIVE_TO_LIMIT)
73+
bounds_min = {
74+
"a_var": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
75+
"b_var": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
76+
"eta_loc": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
77+
"eta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
78+
"loc": np.nextafter(0, np.inf, dtype=dtype),
79+
"scale": np.nextafter(0, np.inf, dtype=dtype),
80+
"likelihood": dtype(0),
81+
"ll": np.log(np.nextafter(0, np.inf, dtype=dtype)),
82+
}
83+
bounds_max = {
84+
"a_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
85+
"b_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
86+
"eta_loc": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
87+
"eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
88+
"loc": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
89+
"scale": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
90+
"likelihood": dtype(1),
91+
"ll": dtype(0),
92+
}
93+
return bounds_min, bounds_max

batchglm/train/numpy/base_glm/estimator.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import logging
44
import multiprocessing
55
import numpy as np
6-
import pprint
76
import scipy
87
import scipy.sparse
98
import scipy.optimize
@@ -270,16 +269,28 @@ def iwls_step(
270269
# Have to use a workaround to solve problems in parallel in dask here. This workaround does
271270
# not work if there is only a single problem, ie. if the first dimension of a and b has length 1.
272271
if a.shape[0] != 1:
273-
delta_theta[:, idx_update] = dask.array.map_blocks(
274-
np.linalg.solve, a, b[:, :, None], chunks=b[:, :, None].shape
272+
get_cond_number = lambda x: np.expand_dims(np.expand_dims(np.linalg.cond(x, p=None), axis=-1), axis=-1)
273+
invertible = np.where(dask.array.map_blocks(
274+
get_cond_number, a, chunks=a.shape
275+
).squeeze().compute() < 1 / sys.float_info.epsilon)[0]
276+
delta_theta[:, idx_update[invertible]] = dask.array.map_blocks(
277+
np.linalg.solve, a[invertible], b[invertible, :, None],
278+
chunks=b[invertible, :, None].shape
275279
).squeeze().T.compute()
276280
else:
277-
delta_theta[:, idx_update] = np.expand_dims(
278-
np.linalg.solve(a[0], b[0]).compute(),
279-
axis=-1
280-
)
281+
if np.linalg.cond(a.compute(), p=None) < 1 / sys.float_info.epsilon:
282+
delta_theta[:, idx_update] = np.expand_dims(
283+
np.linalg.solve(a[0], b[0]).compute(),
284+
axis=-1
285+
)
286+
invertible = np.array([0])
287+
else:
288+
invertible = np.array([])
281289
else:
282-
delta_theta[:, idx_update] = np.linalg.solve(a, b).T
290+
invertible = np.where(np.linalg.cond(a, p=None) < 1 / sys.float_info.epsilon)[0]
291+
delta_theta[:, idx_update[invertible]] = np.linalg.solve(a[invertible], b[invertible]).T
292+
if invertible.shape[0] < len(idx_update):
293+
print("caught %i linalg singular matrix errors" % (len(idx_update) - invertible.shape[0]))
283294
# Via np.linalg.lsts:
284295
#delta_theta[:, idx_update] = np.concatenate([
285296
# np.expand_dims(np.linalg.lstsq(a[i, :, :], b[i, :])[0], axis=-1)
@@ -512,7 +523,10 @@ def finalize(self):
512523
"""
513524
# Read from numpy-IRLS estimator specific model:
514525
self._hessian = - self.model.fim.compute()
515-
self._fisher_inv = np.linalg.inv(- self._hessian)
526+
fisher_inv = np.zeros_like(self._hessian)
527+
invertible = np.where(np.linalg.cond(self._hessian, p=None) < 1 / sys.float_info.epsilon)[0]
528+
fisher_inv[invertible] = np.linalg.inv(- self._hessian[invertible])
529+
self._fisher_inv = fisher_inv
516530
self._jacobian = np.sum(np.abs(self.model.jac.compute() / self.model.x.shape[0]), axis=1)
517531
self._log_likelihood = self.model.ll_byfeature.compute()
518532
self._loss = np.sum(self._log_likelihood)

batchglm/unit_test/test_acc_glm_all_numpy.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ def __init__(
3636
design_scale_names=simulator.input_data.design_scale_names,
3737
constraints_loc=simulator.input_data.constraints_loc,
3838
constraints_scale=simulator.input_data.constraints_scale,
39-
size_factors=simulator.input_data.size_factors
39+
size_factors=simulator.input_data.size_factors,
40+
chunk_size_cells=int(1e9),
41+
chunk_size_genes=2
4042
)
4143
else:
4244
input_data = InputDataGLM(
@@ -47,7 +49,9 @@ def __init__(
4749
design_scale_names=simulator.input_data.design_scale_names,
4850
constraints_loc=simulator.input_data.constraints_loc,
4951
constraints_scale=simulator.input_data.constraints_scale,
50-
size_factors=simulator.input_data.size_factors
52+
size_factors=simulator.input_data.size_factors,
53+
chunk_size_cells=int(1e9),
54+
chunk_size_genes=2
5155
)
5256

5357
self.estimator = Estimator(

0 commit comments

Comments
 (0)