@@ -28,9 +28,9 @@ def _coef_invariant_a(
2828 of i and j.
2929 .. math::
3030
31- &J^{m}_{i} = X^m_i*Y-X^m_i* (Y+r)*\f rac{mu}{mu+r} \\
32- &const = (Y+r)*\f rac{mu}{mu+r} \\
33- &J^{m}_{i} = X^m_i*Y-X^m_i* const \\
31+ &J^{m}_{i} = X^m_i*\b igg(Y- (Y+r)*\f rac{mu}{mu+r}\b igg) \\
32+ &const = Y- (Y+r)*\f rac{mu}{mu+r} \\
33+ &J^{m}_{i} = X^m_i*const \\
3434
3535 :param X: tf.tensor observations x features
3636 Observation by observation and feature.
@@ -43,13 +43,14 @@ def _coef_invariant_a(
4343 Coefficient invariant terms of hessian of
4444 given observations and features.
4545 """
46- const = tf .multiply (
47- tf .add (X , r ), # [observations, features]
46+ const = tf .multiply ( # [observations, features]
47+ tf .add (X , r ),
4848 tf .divide (
49- mu , # [observations, features]
49+ mu ,
5050 tf .add (mu , r )
5151 )
5252 )
53+ const = tf .subtract (X , const )
5354 return const
5455
5556
@@ -68,11 +69,11 @@ def _coef_invariant_b(
6869 of i and j.
6970 .. math::
7071
71- GJ {r}_{i} &= X^r_i \\
72- &*r*\b igg(psi_0(r+Y)+ psi_0(r) \\
72+ J {r}_{i} &= X^r_i \\
73+ &*r*\b igg(psi_0(r+Y)- psi_0(r) \\
7374 &-\f rac{r+Y}{r+mu} \\
7475 &+log(r)+1-log(r+mu) \b igg) \\
75- const = r*\b igg(psi_0(r+Y)+ psi_0(r) \\ const1
76+ const = r*\b igg(psi_0(r+Y)- psi_0(r) \\ const1
7677 &-\f rac{r+Y}{r+mu} \\ const2
7778 &+log(r)+1-log(r+mu) \b igg) \\ const3
7879 J^{r}_{i} &= X^r_i * const \\
@@ -88,22 +89,19 @@ def _coef_invariant_b(
8889 Coefficient invariant terms of hessian of
8990 given observations and features.
9091 """
91- scalar_one = tf .constant (1 , shape = () , dtype = X .dtype )
92+ scalar_one = tf .constant (1 , shape = [ 1 , 1 ] , dtype = X .dtype )
9293 # Pre-define sub-graphs that are used multiple times:
93- r_plus_mu = r + mu
94- r_plus_x = r + X
94+ r_plus_mu = tf . add ( r , mu )
95+ r_plus_x = tf . add ( r , X )
9596 # Define graphs for individual terms of constant term of hessian:
96- const1 = tf .add ( # [observations, features]
97+ const1 = tf .subtract (
9798 tf .math .digamma (x = r_plus_x ),
9899 tf .math .digamma (x = r )
99100 )
100- const2 = tf .negative (tf .divide (
101- r_plus_x ,
102- r_plus_mu
103- ))
104- const3 = tf .add ( # [observations, features]
101+ const2 = tf .negative (tf .divide (r_plus_x , r_plus_mu ))
102+ const3 = tf .add (
105103 tf .log (r ),
106- scalar_two - tf .log (r_plus_mu )
104+ tf . subtract ( scalar_one , tf .log (r_plus_mu ) )
107105 )
108106 const = tf .add_n ([const1 , const2 , const3 ]) # [observations, features]
109107 const = tf .multiply (r , const )
@@ -178,12 +176,10 @@ def __init__(
178176 )
179177 self .neg_jac = tf .negative (self .jac )
180178 elif mode == "tf" :
181- if batch_model is None :
182- raise ValueError ("mode tf only possible if batch_model is given to Jacobians." )
183179 # tensorflow computes the jacobian based on the objective,
184180 # which is the negative log-likelihood. Accordingly, the jacobian
185181 # is the negative jacobian computed here.
186- self .neg_jac = self .tf (
182+ self .jac = self .tf (
187183 batched_data = batched_data ,
188184 sample_indices = sample_indices ,
189185 batch_model = batch_model ,
@@ -193,7 +189,7 @@ def __init__(
193189 iterator = iterator ,
194190 dtype = dtype
195191 )
196- self .jac = tf .negative (self .neg_jac )
192+ self .neg_jac = tf .negative (self .jac )
197193 else :
198194 raise ValueError ("mode not recognized in Jacobian: " + mode )
199195
@@ -225,19 +221,16 @@ def _a_byobs(X, design_loc, design_scale, mu, r):
225221 :return Jblock: tf.tensor features x coefficients
226222 Block of jacobian.
227223 """
228- const = _coef_invariant_a (X = X , mu = mu , r = r ) # [observations x features]
229- Jblock = tf .subtract ( # [features x coefficients]
230- tf .matmul (tf .transpose (X ), design_loc , axes = 1 ),
231- tf .matmul (tf .transpose (const ), design_loc , axes = 1 )
232- )
224+ const = _coef_invariant_a (X = X , mu = mu , r = r ) # [observations, features]
225+ Jblock = tf .matmul (tf .transpose (const ), design_loc ) # [features, coefficients]
233226 return Jblock
234227
235228 def _b_byobs (X , design_loc , design_scale , mu , r ):
236229 """
237230 Compute the dispersion model block of the jacobian.
238231 """
239- const = _coef_invariant_b (X = X , mu = mu , r = r ) # [observations x features]
240- Jblock = tf .matmul (tf .transpose (const ), design_loc , axes = 1 ) # [features x coefficients]
232+ const = _coef_invariant_b (X = X , mu = mu , r = r ) # [observations, features]
233+ Jblock = tf .matmul (tf .transpose (const ), design_scale ) # [features, coefficients]
241234 return Jblock
242235
243236 def _assemble_bybatch (idx , data ):
@@ -310,6 +303,7 @@ def _red(prev, cur):
310303 idx = sample_indices ,
311304 data = batched_data
312305 )
306+
313307 return J
314308
315309 def tf (
@@ -328,7 +322,9 @@ def tf(
328322 """
329323
330324 def _jac (batch_model , model_vars ):
331- return tf .gradients (batch_model .log_likelihood , model_vars .params )[0 ]
325+ J = tf .gradients (batch_model .log_likelihood , model_vars .params )[0 ]
326+ J = tf .transpose (J )
327+ return J
332328
333329 def _assemble_bybatch (idx , data ):
334330 """
@@ -364,7 +360,7 @@ def _assemble_bybatch(idx, data):
364360 size_factors = size_factors
365361 )
366362
367- J = _jac (batch_model = batch_model , model_vars = model_vars )
363+ J = _jac (batch_model = model , model_vars = model_vars )
368364 return J
369365
370366 def _red (prev , cur ):
@@ -378,6 +374,10 @@ def _red(prev, cur):
378374 """
379375 return tf .add (prev , cur )
380376
377+ params = model_vars .params
378+ p_shape_a = model_vars .a .shape [0 ]
379+ p_shape_b = model_vars .b .shape [0 ]
380+
381381 if iterator == True and batch_model is None :
382382 J = op_utils .map_reduce (
383383 last_elem = tf .gather (sample_indices , tf .size (sample_indices ) - 1 ),
0 commit comments