Skip to content

Commit c36d7e0

Browse files
authored
Merge pull request #117 from ailurophilia/main
avoid generating large matrix in macro_fisher_null.R
2 parents 3cbe567 + 03f9709 commit c36d7e0

File tree

7 files changed

+139
-49
lines changed

7 files changed

+139
-49
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: radEmu
22
Title: Using Relative Abundance Data to Estimate of Multiplicative Differences in Mean Absolute Abundance
3-
Version: 1.2.0.0
3+
Version: 1.3.0.0
44
Authors@R: as.person(c(
55
"David Clausen <dsc24@uw.edu> [aut, cre]",
66
"Amy Willis [aut]",

R/fit_null.R

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,25 @@
1212
#' @param rho_init where to start quadratic penalty parameter
1313
#' @param tau how much to increment rho by each iteration
1414
#' @param kappa cutoff above which to increment rho. If distance to feasibility doesn't shrink by at least this factor in an iteration, increment rho by tau.
15-
#' @param B_tol tolerance for convergence in max_{k,j} |B^t_{kj} - B^{(t - 1)}_{kj}|
15+
#' @param B_tol tolerance for convergence in $max_{k,j} |B^t_{kj} - B^{(t - 1)}_{kj}|$
1616
#' @param inner_tol tolerance for inner loop
17-
#' @param constraint_tol tolerance for |B_kj - g(B_k)|
17+
#' @param constraint_tol tolerance for $|B_kj - g(B_k)|$
1818
#' @param max_step maximum step size
1919
#' @param c1 constant for armijo rule
2020
#' @param maxit maximum iterations
2121
#' @param inner_maxit max iterations per inner loop
2222
#' @param verbose shout at you?
2323
#' @param trackB track value of beta across iterations and return?
2424
#'
25-
#' @returns A list containing elements 'B', 'k_constr', 'j_constr', 'niter'
26-
#' 'gap', 'u', 'rho', and 'Bs'. 'B' is a matrix containing parameter estimates
25+
#' @return A list containing elements `B`, `k_constr`, `j_constr`, `niter`
26+
#' `gap`, `u`, `rho`, and `Bs`. `B` is a matrix containing parameter estimates
2727
#' under the null (obtained by maximum likelihood on augmented observations Y),
28-
#' 'k_constr', and 'j_constr' give row and column indexes of the parameter
29-
#' fixed to be equal to the constraint function g() under the null. 'niter' is a
28+
#' `k_constr`, and `j_constr` give row and column indexes of the parameter
29+
#' fixed to be equal to the constraint function $g()$ under the null. `niter` is a
3030
#' scalar giving total number of outer iterations used to fit the null model,
31-
#' 'gap' gives the final value of g(B_{k_constr}) - B_{k_constr j_constr},
32-
#' 'u' and 'rho' are final values of augmented Lagrangian parameters, and
33-
#' 'Bs' is a data frame containing values of B by iteration if trackB was set
31+
#' `gap` gives the final value of $g(B_{k constr}) - B_{k constr, j constr}$,
32+
#' `u` and `rho` are final values of augmented Lagrangian parameters, and
33+
#' `Bs` is a data frame containing values of B by iteration if `trackB` was set
3434
#' equal to TRUE (otherwise it contains a NULL value).
3535
#'
3636
fit_null <- function(B,

R/macro_fisher_null.R

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,8 @@ macro_fisher_null <- function(X,
6969
sm_denom <- as.numeric(as.matrix(1 + Matrix::crossprod(v,info_inverse)%*%v))
7070
sm_half_num <- info_inverse%*%v
7171

72-
#strictly speaking, this isn't the inverse info -- its the inverse of
72+
#strictly speaking, info_inverse isn't the inverse info -- its the inverse of
7373
# (an approximation to) the hessian of the augmented lagrangian
74-
info_inverse <- info_inverse - Matrix::tcrossprod(sm_half_num)/sm_denom
75-
# the tcrossprod in the last line is the final step for the sherman-morrison update
7674

7775
#compute derivative of augmented lagrangian
7876
lag_deriv <- lapply(1:J,
@@ -86,8 +84,8 @@ macro_fisher_null <- function(X,
8684

8785

8886
#direction for (approximate) Newton step
89-
update_dir <- -info_inverse%*%lag_deriv
90-
87+
update_dir <- -info_inverse %*%lag_deriv +
88+
sm_half_num%*% Matrix::crossprod(sm_half_num,lag_deriv)/sm_denom
9189
#shorten step if any of its elements are larger than allowed by max_step
9290
if(max(abs(update_dir))>max_step){
9391
update_dir <- update_dir/max(abs(update_dir))
@@ -102,6 +100,12 @@ macro_fisher_null <- function(X,
102100
update_dir[,(1:(J - 1))>=j_ref,drop = FALSE]))
103101

104102

103+
#in debug mode, compute full inverse info for backward compatibility
104+
if(debug){
105+
info_inverse <- info_inverse - Matrix::tcrossprod(sm_half_num)/sm_denom
106+
}
107+
108+
105109

106110

107111

R/score_test.R

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#' @param k_constr row index of element of B to be tested for equality to row identifiability constraint
1111
#' @param j_constr column index of element of B to be tested for equality to row identifiability constraint
1212
#' @param constraint_fn constraint function g to be imposed on each row of B; the null that
13-
#' is tested is B_{k_constr,j_constr} = g(B_k_constr)
13+
#' is tested is $B_{k_constr,j_constr} = g(B_k_constr)$
1414
#' @param constraint_grad_fn derivative of constraint_fn with respect to its
1515
#' arguments (i.e., elements of a row of B)
1616
#' @param j_ref column dropped from B in convenience parametrization used to compute
@@ -70,19 +70,19 @@
7070
#' be used in computing GEE test statistics. Default is NULL, in which case rows of
7171
#' Y are treated as independent.
7272
#'
73-
#' @return A list containing elements 'score_stat', 'pval', 'log_pval','niter',
74-
#' 'convergence', 'gap', 'u', 'rho', 'tau', 'inner_maxit', 'null_B', and 'Bs'. 'score_stat' gives the
75-
#' value of the robust score statistic for H_0: B_{k_constr,j_constr} = g(B_{k_constr}).
76-
#' 'pval' and 'log_pval' are the p-value (on natural and log scales) corresponding to
73+
#' @return A list containing elements `score_stat`, `pval`, `log_pval`,'niter`,
74+
#' `convergence`, `gap`, `u`, `rho`, `tau`, `inner_maxit`, `null_B`, and `Bs`. `score_stat` gives the
75+
#' value of the robust score statistic for $H_0: B_{k_constr,j_constr} = g(B_{k_constr})$.
76+
#' `pval` and `log_pval` are the p-value (on natural and log scales) corresponding to
7777
#' the score statistic (log_pval may be useful when the p-value is very close to zero).
78-
#' 'gap' is the final value of g(B_{k_constr}) - B_{k_constr, j_constr} obtained in
79-
#' optimization under the null. 'u' and 'rho' are final values of augmented
80-
#' Lagrangian parameters returned by null fitting algorithm. 'tau' is the final value of 'tau' that
81-
#' is used to update the 'rho' values and 'inner_maxit' is the final maximum number of iterations for
78+
#' `gap` is the final value of $g(B_{k_constr}) - B_{k_constr, j_constr}$ obtained in
79+
#' optimization under the null. `u` and `rho` are final values of augmented
80+
#' Lagrangian parameters returned by null fitting algorithm. `tau` is the final value of `tau` that
81+
#' is used to update the `rho` values and `inner_maxit` is the final maximum number of iterations for
8282
#' the inner optimization loop in optimization under the null, in which B and z parameter values are
83-
#' maximized for specific 'u' and 'rho' parameters. 'null_B' is the value of
84-
#' B returned but the null fitting algorithm. 'Bs' is by default NULL; if trackB = TRUE,
85-
#' 'Bs is a data frame containing values of B by outcome category, covariate, and
83+
#' maximized for specific `u` and `rho` parameters. `null_B` is the value of
84+
#' B returned but the null fitting algorithm. `Bs` is by default `NULL`; if `trackB = TRUE`,
85+
#' `Bs` is a data frame containing values of B by outcome category, covariate, and
8686
#' iteration.
8787
#'
8888
#'

man/fit_null.Rd

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/score_test.Rd

Lines changed: 1 addition & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-macro_fisher_null.R

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,3 +352,101 @@ test_that("We take similar step as we'd take using numerical derivatives in a mo
352352
expect_equal(update$update,n_update,tolerance = 1e-1)
353353

354354
})
355+
356+
test_that("We take same step as we'd take using numerical derivatives when gap, rho, u are zero", {
357+
set.seed(59542234)
358+
n <- 10
359+
p <- 2
360+
X <- cbind(1,rep(c(0,1),each = n/2))
361+
J <- 10000
362+
z <- rnorm(n) +8
363+
b0 <- rnorm(J)
364+
b1 <- seq(1,10,length.out = J)
365+
b1 <- b1 - mean(b1)
366+
b0 <- b0 - mean(b0)
367+
b <- rbind(b0,b1)
368+
Y <- matrix(NA,ncol = J, nrow = n)
369+
370+
k_constr <- 2
371+
j_constr <- 1
372+
p <- 2
373+
374+
constraint_fn <- function(x){ pseudohuber_center(x,0.1)}
375+
# constraint_fn <- function(x){mean(x)}
376+
377+
##### Arguments to fix:
378+
379+
constraint_grad_fn <- function(x){dpseudohuber_center_dx(x,0.1)}
380+
# constraint_grad_fn <- function(x){ rep(1/length(x), length(x))}
381+
rho_init = 1
382+
tau = 1.2
383+
kappa = 0.8
384+
obj_tol = 100
385+
score_tol <- 1e-3
386+
constraint_tol = 1e-5
387+
init_tol = 1e6
388+
c1 = 1e-4
389+
maxit = 1000
390+
inner_maxit = 25
391+
392+
Y[] <- 0
393+
for(i in 1:n){
394+
while(sum(Y[i,])==0){
395+
for(j in 1:J){
396+
temp_mean <- exp(X[i,,drop = FALSE]%*%b[,j,drop = FALSE] + z[i])
397+
Y[i,j] <- rpois(1, lambda = temp_mean)
398+
# Y[i,j] <- rnbinom(1,mu = temp_mean, size = 3)*rbinom(1,1,0.6)
399+
}
400+
}
401+
}
402+
403+
j_ref <- 5
404+
405+
full_fit <- #suppressMessages(
406+
emuFit_micro_penalized(X = X,
407+
Y = Y,
408+
B = NULL,
409+
constraint_fn = mean,
410+
tolerance = 10,
411+
verbose = FALSE)#)
412+
413+
B <- full_fit$B
414+
B[1,] <- B[1,] - B[1,j_ref]
415+
B[2,] <- B[2,] - B[2,j_ref]
416+
417+
Y_aug <- full_fit$Y_augmented
418+
419+
B[k_constr,j_constr] <- constraint_fn(B[k_constr,-j_constr])
420+
421+
u <- 0
422+
rho <- 0
423+
424+
z <- update_z(Y,X,B)
425+
426+
macro_time <- try(system.time(macro_fisher_null(X = X,
427+
Y = Y_aug,
428+
B = B,
429+
z = z,
430+
J = J,
431+
p = p,
432+
k_constr = k_constr,
433+
j_constr = j_constr,
434+
j_ref = j_ref,
435+
rho = rho,
436+
u = u,
437+
constraint_fn = constraint_fn,
438+
constraint_grad_fn = constraint_grad_fn,
439+
stepsize = 0.5,
440+
c1 = 1e-4,
441+
regularization = 0,
442+
debug= FALSE)))
443+
444+
if(inherits(macro_time, "try-error")){
445+
expect_true(FALSE)
446+
}
447+
expect_true(macro_time[3]<10)
448+
#failing this test indicates that macro_fisher_null is probably directly
449+
#computing the full inverse of the (approximate) hessian matrix of the log likelihood
450+
#this includes an outer product that does not need to be computed
451+
})
452+

0 commit comments

Comments
 (0)