Skip to content

Commit dc408d1

Browse files
closed form jacobians are working
1 parent f4c6d48 commit dc408d1

File tree

3 files changed

+62
-51
lines changed

3 files changed

+62
-51
lines changed

batchglm/train/tf/nb_glm/estimator.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ def map_model(idx, data) -> BasicModelGraph:
9292
constraints_scale=constraints_scale,
9393
model_vars=model_vars,
9494
mode=pkg_constants.HESSIAN_MODE,
95+
iterator=True,
9596
dtype=dtype
9697
)
9798

@@ -104,6 +105,7 @@ def map_model(idx, data) -> BasicModelGraph:
104105
constraints_scale=constraints_scale,
105106
model_vars=model_vars,
106107
mode=pkg_constants.JACOBIAN_MODE,
108+
iterator=True,
107109
dtype=dtype
108110
)
109111

@@ -252,7 +254,7 @@ def __init__(
252254

253255
# Define the jacobian on the batched model for newton-rhapson:
254256
batch_jac = Jacobians(
255-
batched_data=batch_X,
257+
batched_data=batch_data,
256258
sample_indices=batch_sample_index,
257259
batch_model=batch_model,
258260
constraints_loc=constraints_loc,
@@ -394,8 +396,9 @@ def __init__(
394396
name="full_data_trainers_b_only"
395397
)
396398
with tf.name_scope("full_gradient"):
397-
full_gradient = full_data_trainers.gradient[0][0]
398-
full_gradient = tf.reduce_sum(tf.abs(full_gradient), axis=0)
399+
#full_gradient = full_data_trainers.gradient[0][0]
400+
#full_gradient = tf.reduce_sum(tf.abs(full_gradient), axis=0)
401+
full_gradient = full_data_model.neg_jac
399402
# full_gradient = tf.add_n(
400403
# [tf.reduce_sum(tf.abs(grad), axis=0) for (grad, var) in full_data_trainers.gradient])
401404

@@ -404,12 +407,12 @@ def __init__(
404407
# Full data model:
405408
param_grad_vec = full_data_model.neg_jac
406409
#param_grad_vec = tf.gradients(- full_data_model.log_likelihood, model_vars.params)[0]
407-
param_grad_vec_t = tf.transpose(param_grad_vec)
410+
#param_grad_vec_t = tf.transpose(param_grad_vec)
408411

409412
delta_t = tf.squeeze(tf.matrix_solve_ls(
410413
full_data_model.neg_hessian,
411414
# (full_data_model.hessians + tf.transpose(full_data_model.hessians, perm=[0, 2, 1])) / 2, # don't need this with closed forms
412-
tf.expand_dims(param_grad_vec_t, axis=-1),
415+
tf.expand_dims(param_grad_vec, axis=-1),
413416
fast=False
414417
), axis=-1)
415418
delta = tf.transpose(delta_t)
@@ -424,11 +427,11 @@ def __init__(
424427
param_grad_vec_batched = batch_jac.neg_jac
425428
#param_grad_vec_batched = tf.gradients(- batch_model.log_likelihood,
426429
# model_vars.params)[0]
427-
param_grad_vec_batched_t = tf.transpose(param_grad_vec_batched)
430+
#param_grad_vec_batched_t = tf.transpose(param_grad_vec_batched)
428431

429432
delta_batched_t = tf.squeeze(tf.matrix_solve_ls(
430433
batch_hessians.neg_hessian,
431-
tf.expand_dims(param_grad_vec_batched_t, axis=-1),
434+
tf.expand_dims(param_grad_vec_batched, axis=-1),
432435
fast=False
433436
), axis=-1)
434437
delta_batched = tf.transpose(delta_batched_t)

batchglm/train/tf/nb_glm/jacobians.py

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ def _coef_invariant_a(
2828
of i and j.
2929
.. math::
3030
31-
&J^{m}_{i} = X^m_i*Y-X^m_i*(Y+r)*\frac{mu}{mu+r} \\
32-
&const = (Y+r)*\frac{mu}{mu+r} \\
33-
&J^{m}_{i} = X^m_i*Y-X^m_i*const \\
31+
&J^{m}_{i} = X^m_i*\bigg(Y-(Y+r)*\frac{mu}{mu+r}\bigg) \\
32+
&const = Y-(Y+r)*\frac{mu}{mu+r} \\
33+
&J^{m}_{i} = X^m_i*const \\
3434
3535
:param X: tf.tensor observations x features
3636
Observation by observation and feature.
@@ -43,13 +43,14 @@ def _coef_invariant_a(
4343
Coefficient invariant terms of hessian of
4444
given observations and features.
4545
"""
46-
const = tf.multiply(
47-
tf.add(X, r), # [observations, features]
46+
const = tf.multiply( # [observations, features]
47+
tf.add(X, r),
4848
tf.divide(
49-
mu, # [observations, features]
49+
mu,
5050
tf.add(mu, r)
5151
)
5252
)
53+
const = tf.subtract(X, const)
5354
return const
5455

5556

@@ -68,11 +69,11 @@ def _coef_invariant_b(
6869
of i and j.
6970
.. math::
7071
71-
GJ{r}_{i} &= X^r_i \\
72-
&*r*\bigg(psi_0(r+Y)+psi_0(r) \\
72+
J{r}_{i} &= X^r_i \\
73+
&*r*\bigg(psi_0(r+Y)-psi_0(r) \\
7374
&-\frac{r+Y}{r+mu} \\
7475
&+log(r)+1-log(r+mu) \bigg) \\
75-
const = r*\bigg(psi_0(r+Y)+psi_0(r) \\ const1
76+
const = r*\bigg(psi_0(r+Y)-psi_0(r) \\ const1
7677
&-\frac{r+Y}{r+mu} \\ const2
7778
&+log(r)+1-log(r+mu) \bigg) \\ const3
7879
J^{r}_{i} &= X^r_i * const \\
@@ -88,22 +89,19 @@ def _coef_invariant_b(
8889
Coefficient invariant terms of hessian of
8990
given observations and features.
9091
"""
91-
scalar_one = tf.constant(1, shape=(), dtype=X.dtype)
92+
scalar_one = tf.constant(1, shape=[1,1], dtype=X.dtype)
9293
# Pre-define sub-graphs that are used multiple times:
93-
r_plus_mu = r + mu
94-
r_plus_x = r + X
94+
r_plus_mu = tf.add(r, mu)
95+
r_plus_x = tf.add(r, X)
9596
# Define graphs for individual terms of constant term of hessian:
96-
const1 = tf.add( # [observations, features]
97+
const1 = tf.subtract(
9798
tf.math.digamma(x=r_plus_x),
9899
tf.math.digamma(x=r)
99100
)
100-
const2 = tf.negative(tf.divide(
101-
r_plus_x,
102-
r_plus_mu
103-
))
104-
const3 = tf.add( # [observations, features]
101+
const2 = tf.negative(tf.divide(r_plus_x, r_plus_mu))
102+
const3 = tf.add(
105103
tf.log(r),
106-
scalar_two - tf.log(r_plus_mu)
104+
tf.subtract(scalar_one, tf.log(r_plus_mu))
107105
)
108106
const = tf.add_n([const1, const2, const3]) # [observations, features]
109107
const = tf.multiply(r, const)
@@ -178,12 +176,10 @@ def __init__(
178176
)
179177
self.neg_jac = tf.negative(self.jac)
180178
elif mode == "tf":
181-
if batch_model is None:
182-
raise ValueError("mode tf only possible if batch_model is given to Jacobians.")
183179
# tensorflow computes the jacobian based on the objective,
184180
# which is the negative log-likelihood. Accordingly, the jacobian
185181
# is the negative jacobian computed here.
186-
self.neg_jac = self.tf(
182+
self.jac = self.tf(
187183
batched_data=batched_data,
188184
sample_indices=sample_indices,
189185
batch_model=batch_model,
@@ -193,7 +189,7 @@ def __init__(
193189
iterator=iterator,
194190
dtype=dtype
195191
)
196-
self.jac = tf.negative(self.neg_jac)
192+
self.neg_jac = tf.negative(self.jac)
197193
else:
198194
raise ValueError("mode not recognized in Jacobian: " + mode)
199195

@@ -225,19 +221,16 @@ def _a_byobs(X, design_loc, design_scale, mu, r):
225221
:return Jblock: tf.tensor features x coefficients
226222
Block of jacobian.
227223
"""
228-
const = _coef_invariant_a(X=X, mu=mu, r=r) # [observations x features]
229-
Jblock = tf.subtract( # [features x coefficients]
230-
tf.matmul(tf.transpose(X), design_loc, axes=1),
231-
tf.matmul(tf.transpose(const), design_loc, axes=1)
232-
)
224+
const = _coef_invariant_a(X=X, mu=mu, r=r) # [observations, features]
225+
Jblock = tf.matmul(tf.transpose(const), design_loc) # [features, coefficients]
233226
return Jblock
234227

235228
def _b_byobs(X, design_loc, design_scale, mu, r):
236229
"""
237230
Compute the dispersion model block of the jacobian.
238231
"""
239-
const = _coef_invariant_b(X=X, mu=mu, r=r) # [observations x features]
240-
Jblock = tf.matmul(tf.transpose(const), design_loc, axes=1) # [features x coefficients]
232+
const = _coef_invariant_b(X=X, mu=mu, r=r) # [observations, features]
233+
Jblock = tf.matmul(tf.transpose(const), design_scale) # [features, coefficients]
241234
return Jblock
242235

243236
def _assemble_bybatch(idx, data):
@@ -310,6 +303,7 @@ def _red(prev, cur):
310303
idx=sample_indices,
311304
data=batched_data
312305
)
306+
313307
return J
314308

315309
def tf(
@@ -328,7 +322,9 @@ def tf(
328322
"""
329323

330324
def _jac(batch_model, model_vars):
331-
return tf.gradients(batch_model.log_likelihood, model_vars.params)[0]
325+
J = tf.gradients(batch_model.log_likelihood, model_vars.params)[0]
326+
J = tf.transpose(J)
327+
return J
332328

333329
def _assemble_bybatch(idx, data):
334330
"""
@@ -364,7 +360,7 @@ def _assemble_bybatch(idx, data):
364360
size_factors=size_factors
365361
)
366362

367-
J = _jac(batch_model=batch_model, model_vars=model_vars)
363+
J = _jac(batch_model=model, model_vars=model_vars)
368364
return J
369365

370366
def _red(prev, cur):
@@ -378,6 +374,10 @@ def _red(prev, cur):
378374
"""
379375
return tf.add(prev, cur)
380376

377+
params = model_vars.params
378+
p_shape_a = model_vars.a.shape[0]
379+
p_shape_b = model_vars.b.shape[0]
380+
381381
if iterator==True and batch_model is None:
382382
J = op_utils.map_reduce(
383383
last_elem=tf.gather(sample_indices, tf.size(sample_indices) - 1),

batchglm/unit_test/test_nb_glm_jacobians.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
def estimate(input_data: InputData):
2121
estimator = Estimator(input_data)
2222
estimator.initialize()
23-
estimator.train_sequence(training_strategy="QUICK")
23+
# Do not train, evalute at initialization!
2424
return estimator
2525

2626

@@ -52,28 +52,36 @@ def test_compute_hessians(self):
5252

5353
input_data = InputData.new(sim.X, design_loc=design_loc, design_scale=design_scale)
5454

55-
pkg_constants.JACOBIAN_MODE = "tf"
56-
self.estimator_ow = estimate(input_data)
57-
t0_tf = time.time()
58-
self.J_tf = self.estimator_ow.hessians
59-
t1_tf = time.time()
60-
self.estimator_ow.close_session()
61-
self.t_tf = t1_tf - t0_tf
62-
6355
pkg_constants.JACOBIAN_MODE = "analytic"
6456
self.estimator_analytic = estimate(input_data)
6557
t0_analytic = time.time()
66-
self.J_analytic = self.estimator_analytic.jac
58+
self.J_analytic = self.estimator_analytic['full_gradient']
59+
self.a_analytic = self.estimator_analytic.a.values
60+
self.b_analytic = self.estimator_analytic.b.values
6761
t1_analytic = time.time()
6862
self.estimator_analytic.close_session()
6963
self.t_analytic = t1_analytic - t0_analytic
7064

65+
pkg_constants.JACOBIAN_MODE = "tf"
66+
self.estimator_tf = estimate(input_data)
67+
t0_tf = time.time()
68+
self.J_tf = self.estimator_tf['full_gradient']
69+
self.a_tf = self.estimator_tf.a.values
70+
self.b_tf = self.estimator_tf.b.values
71+
t1_tf = time.time()
72+
self.estimator_tf.close_session()
73+
self.t_tf = t1_tf - t0_tf
74+
7175
i = 1
7276
print("\n")
7377
print("run time tensorflow solution: ", str(self.t_tf))
7478
print("run time observation batch-wise analytic solution: ", str(self.t_analytic))
75-
print("ratio of analytic jacobian to analytic observation-wise jacobian:")
76-
print(self.J_tf.values[i, :] / self.J_analytic.values[i, :])
79+
print("relative difference of mean estimates for analytic jacobian to observation-wise jacobian:")
80+
print((self.a_analytic - self.a_tf) / self.a_tf)
81+
print("relative difference of dispersion estimates for analytic jacobian to observation-wise jacobian:")
82+
print((self.b_analytic - self.b_tf) / self.b_tf)
83+
print("relative difference of analytic jacobian to analytic observation-wise jacobian:")
84+
print((self.J_tf - self.J_analytic)/self.J_tf)
7785

7886

7987
if __name__ == '__main__':

0 commit comments

Comments
 (0)