Skip to content

Commit 1b0b02b

Browse files
authored
Merge pull request #118 from svteichman/require-test_kj
Make argument `test_kj` required in `emuFit()` when `run_score_tests = TRUE`
2 parents c36d7e0 + c368ae0 commit 1b0b02b

14 files changed

+122
-65
lines changed

DESCRIPTION

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
Package: radEmu
22
Title: Using Relative Abundance Data to Estimate of Multiplicative Differences in Mean Absolute Abundance
3-
Version: 1.3.0.0
4-
Authors@R: as.person(c(
5-
"David Clausen <dsc24@uw.edu> [aut, cre]",
6-
"Amy Willis [aut]",
7-
"Sarah Teichman [aut]"
8-
))
3+
Version: 2.0.0.0
4+
Authors@R: c(person("David", "Clausen", role = c("aut")),
5+
person("Amy D", "Willis", email = "adwillis@uw.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2802-4317")),
6+
person("Sarah", "Teichman", role = "aut"))
97
Description: A differential abundance method for the analysis of microbiome data. radEmu estimates fold-differences in the abundance of taxa across samples relative to "typical" fold-differences. Notably, it does not require pseudocounts, nor choosing a denominator taxon.
108
URL: https://github.com/statdivlab/radEmu, https://statdivlab.github.io/radEmu/
119
License: MIT + file LICENSE

NEWS.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# radEmu 2.0.0.0
2+
3+
This is a major release that speeds up score tests, and forces the user to clarify that they wish to perform score tests. It makes the default behaviour faster, but is not backwards compatible.
4+
5+
## Breaking changes
6+
7+
* The argument `test_kj` is now required for `emuFit()` when `run_score_tests = TRUE` (the default). Previous default behavior was to run score tests for every parameter, which can be very time consuming(and can easily be parallelized). This change forces the user to explicitly state what tests they would like to run, significantly decreasing unnecessary computation.
8+
9+
## Additional changes
10+
11+
* We have also streamlined estimation under the null, leading to improved convergence and reduced computation.

R/emuFit.R

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@
2222
#' should fitting step be skipped (FALSE), e.g., if score tests are to be run on an already
2323
#' fitted model. Default is TRUE.
2424
#' @param test_kj a data frame whose rows give coordinates (in category j and
25-
#' covariate k) of elements of B to construct hypothesis tests for. If \code{test_kj}
26-
#' is not provided, all elements of B save the intercept row will be tested. If you don't know
25+
#' covariate k) of elements of B to construct hypothesis tests for. If you don't know
2726
#' which indices k correspond to the covariate(s) that you would like to test, run the function
2827
#' \code{radEmu::make_design_matrix()} in order to view the design matrix, and identify which
29-
#' column of the design matrix corresponds to each covariate in your model.
28+
#' column of the design matrix corresponds to each covariate in your model. This argument is required when
29+
#' running score tests.
3030
#' @param alpha nominal type 1 error level to be used to construct confidence intervals. Default is 0.05
3131
#' (corresponding to 95% confidence intervals)
3232
#' @param return_wald_p logical: return p-values from Wald tests? Default is FALSE.
@@ -207,7 +207,8 @@ emuFit <- function(Y,
207207
match_row_names = match_row_names,
208208
verbose = verbose,
209209
remove_zero_comparison_pvals = remove_zero_comparison_pvals,
210-
unobserved_taxon_error = unobserved_taxon_error)
210+
unobserved_taxon_error = unobserved_taxon_error,
211+
run_score_tests = run_score_tests)
211212
Y <- check_results$Y
212213
X <- check_results$X
213214
cluster <- check_results$cluster

R/emuFit_check.R

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#' If a value between 0 and 1, all zero-comparison p-values below the value will be set to NA.
3030
#' Default is \code{0.01}.
3131
#' @param unobserved_taxon_error logical: should an error be thrown if Y includes taxa that have 0 counts for all samples? Default is TRUE.
32+
#' @param run_score_tests logical: perform robust score testing?
3233
#'
3334
#' @return returns objects \code{Y}, \code{X}, \code{cluster}, and \code{B_null_list}, which may be modified by tests, and throw any useful
3435
#' errors, warnings, or messages.
@@ -46,7 +47,8 @@ emuFit_check <- function(Y,
4647
match_row_names = TRUE,
4748
verbose = FALSE,
4849
remove_zero_comparison_pvals = 0.01,
49-
unobserved_taxon_error = TRUE) {
50+
unobserved_taxon_error = TRUE,
51+
run_score_tests = TRUE) {
5052

5153
# confirm that input to verbose is valid
5254
if (!(verbose %in% c(FALSE, TRUE, "development"))) {
@@ -204,6 +206,13 @@ ignoring argument 'cluster'.")
204206
}
205207
}
206208

209+
# check that test_kj is not null if running score tests
210+
if (run_score_tests) {
211+
if (is.null(test_kj)) {
212+
stop("When `run_score_tests = TRUE`, you must provide a matrix `test_kj` to determine which parameters you want to test. If you don't know which indices k correspond to the covariate(s) that you would like to test, run the function `radEmu::make_design_matrix()` in order to view the design matrix, and identify which column of the design matrix corresponds to each covariate in your model. If you don't know which indices j correspond to categories (taxa) that you want to test, you can look at the columns and column names of your `Y` matrix.")
213+
}
214+
}
215+
207216
# check for valid argument remove_zero_comparison_pvals
208217
if (remove_zero_comparison_pvals != TRUE & remove_zero_comparison_pvals != FALSE) {
209218
if (!(is.numeric(remove_zero_comparison_pvals) & remove_zero_comparison_pvals <= 1 &

man/emuFit.Rd

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/emuFit_check.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-augmentation-failures.R

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ test_that("confirm Matrix Csparse_transpose issue is not happening", {
2222
B_null_tol = 1e-2,
2323
tolerance = 0.01,
2424
tau = 2,
25-
run_score_test = TRUE,
26-
return_wald_p = TRUE)
25+
run_score_tests = TRUE,
26+
return_wald_p = TRUE,
27+
test_kj = data.frame(k = 2, j = 1),
28+
match_row_names = FALSE)
2729

2830
expect_true("emuFit" %in% class(fitted_model))
2931

@@ -37,8 +39,10 @@ test_that("confirm Matrix Csparse_transpose issue is not happening", {
3739
B_null_tol = 1e-2,
3840
tolerance = 0.01,
3941
tau = 2,
40-
run_score_test = TRUE,
41-
return_wald_p = TRUE)
42+
run_score_tests = TRUE,
43+
return_wald_p = TRUE,
44+
test_kj = data.frame(k = 2, j = 1),
45+
match_row_names = FALSE)
4246

4347
expect_true("emuFit" %in% class(fitted_model_df))
4448

tests/testthat/test-cluster.R

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ test_that("clusters work as I want", {
2323
data = XX,
2424
Y = Y,
2525
cluster=cage_num,
26-
run_score_tests=FALSE) #### very fast
26+
run_score_tests=FALSE,
27+
match_row_names = FALSE) #### very fast
2728
expect_equal(ef_num$coef %>% class, "data.frame")
2829

2930
# check that cluster argument works as character vector and gives
@@ -33,7 +34,8 @@ test_that("clusters work as I want", {
3334
data = XX,
3435
Y = Y,
3536
cluster=cage_char,
36-
run_score_tests=FALSE)
37+
run_score_tests=FALSE,
38+
match_row_names = FALSE)
3739
expect_equal(ef_num$coef, ef_char$coef)
3840

3941
# check that cluster argument works as factor and gives equivalent results
@@ -43,7 +45,8 @@ test_that("clusters work as I want", {
4345
data = XX,
4446
Y = Y,
4547
cluster=cage_fact,
46-
run_score_tests=FALSE)
48+
run_score_tests=FALSE,
49+
match_row_names = FALSE)
4750
expect_equal(ef_num$coef, ef_fact$coef)
4851
})
4952

@@ -110,7 +113,8 @@ test_that("GEE with cluster covariance gives plausible type 1 error ",{
110113
return_both_score_pvals = FALSE,
111114
test_kj = data.frame(k = c(2,2),
112115
j = c(3,4)),
113-
cluster = cluster)
116+
cluster = cluster,
117+
match_row_names = FALSE)
114118

115119
fitted_model_nocluster <- emuFit(Y = Y,
116120
X = X,
@@ -127,7 +131,8 @@ test_that("GEE with cluster covariance gives plausible type 1 error ",{
127131
use_fullmodel_cov = FALSE,
128132
return_both_score_pvals = FALSE,
129133
test_kj = data.frame(k = c(2,2),
130-
j = c(3,4)))
134+
j = c(3,4)),
135+
match_row_names = FALSE)
131136
# })
132137

133138
filtered_coef <- fitted_model_cluster$coef[!is.na(fitted_model_cluster$coef$pval),

tests/testthat/test-emuFit.R

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ test_that("emuFit takes formulas and actually fits a model", {
3939
run_score_tests = TRUE,
4040
use_fullmodel_info = FALSE,
4141
use_fullmodel_cov = FALSE,
42-
return_both_score_pvals = FALSE)
42+
return_both_score_pvals = FALSE,
43+
test_kj = data.frame(k = 2, j = 1:6))
4344
})
4445

4546

@@ -58,7 +59,7 @@ test_that("emuFit takes formulas and actually fits a model", {
5859
formula = ~group,
5960
data = covariates,
6061
refit = FALSE,
61-
run_score_test = FALSE,
62+
run_score_tests= FALSE,
6263
fitted_model = fitted_model)
6364

6465
expect_identical(fitted_model$coef$estimate, second_model$coef$estimate)
@@ -72,10 +73,11 @@ test_that("emuFit takes formulas and actually fits a model", {
7273
B_null_tol = 0.01,
7374
tolerance = 0.01,
7475
data = covariates,
75-
run_score_test = TRUE,
76+
run_score_tests= TRUE,
7677
return_wald_p = TRUE, ### diff
7778
use_fullmodel_info = TRUE, ### diff
78-
verbose = FALSE)
79+
verbose = FALSE,
80+
test_kj = data.frame(k = 2, j = 1:6))
7981

8082

8183
expect_true(all(fitted_model_use_fullmodel_info$coef$wald_p>0 & fitted_model_use_fullmodel_info$coef$wald_p<1))
@@ -92,11 +94,12 @@ test_that("emuFit takes formulas and actually fits a model", {
9294
formula = ~group,
9395
tau = 1.2,
9496
data = covariates,
95-
run_score_test = TRUE,
97+
run_score_tests= TRUE,
9698
return_wald_p = TRUE,
9799
use_fullmodel_info = TRUE,
98100
verbose = FALSE,
99-
return_both_score_pvals = TRUE)
101+
return_both_score_pvals = TRUE,
102+
test_kj = data.frame(k = 2, j = 1:6))
100103

101104
ps_full <- fitted_model_both$coef$score_pval_full_info
102105
ps_null <- fitted_model_both$coef$score_pval_null_info
@@ -115,7 +118,7 @@ test_that("emuFit takes formulas and actually fits a model", {
115118
# formula = ~group,
116119
# tau = 1.2,
117120
# data = covariates,
118-
# run_score_test = TRUE,
121+
# run_score_tests= TRUE,
119122
# return_wald_p = TRUE,
120123
# use_fullmodel_info = TRUE,
121124
# verbose = FALSE,
@@ -146,7 +149,8 @@ test_that("emuFit takes cluster argument without breaking ",{
146149
use_fullmodel_info = FALSE,
147150
use_fullmodel_cov = FALSE,
148151
return_both_score_pvals = FALSE,
149-
cluster = rep(1:3,each = 4))
152+
cluster = rep(1:3,each = 4),
153+
test_kj = data.frame(k = 2, j = 1:6))
150154
})
151155

152156
expect_silent({
@@ -163,7 +167,8 @@ test_that("emuFit takes cluster argument without breaking ",{
163167
run_score_tests = TRUE,
164168
use_fullmodel_info = FALSE,
165169
use_fullmodel_cov = FALSE,
166-
return_both_score_pvals = FALSE)
170+
return_both_score_pvals = FALSE,
171+
test_kj = data.frame(k = 2, j = 1:6))
167172
})
168173

169174
expect_true(all(fitted_model_nocluster$coef$estimate == fitted_model_cluster$coef$estimate))
@@ -336,7 +341,7 @@ test_that("GEE with cluster covariance gives plausible type 1 error ",{
336341
# formula = ~group,
337342
# data = covariates,
338343
# tolerance = 0.01,
339-
# run_score_test = FALSE,
344+
# run_score_tests= FALSE,
340345
# return_wald_p = TRUE)
341346
#
342347
#
@@ -364,7 +369,8 @@ test_that("emuFit runs without penalty", {
364369
run_score_tests = TRUE,
365370
use_fullmodel_info = FALSE,
366371
use_fullmodel_cov = FALSE,
367-
return_both_score_pvals = FALSE)
372+
return_both_score_pvals = FALSE,
373+
test_kj = data.frame(k = 2, j = 1:6))
368374
})
369375
})
370376

@@ -383,7 +389,8 @@ test_that("emuFit runs with just intercept model", {
383389
run_score_tests = TRUE,
384390
use_fullmodel_info = FALSE,
385391
use_fullmodel_cov = FALSE,
386-
return_both_score_pvals = FALSE)
392+
return_both_score_pvals = FALSE,
393+
test_kj = data.frame(k = 1, j = 1:6))
387394
})
388395

389396
expect_message({
@@ -398,7 +405,8 @@ test_that("emuFit runs with just intercept model", {
398405
run_score_tests = TRUE,
399406
use_fullmodel_info = FALSE,
400407
use_fullmodel_cov = FALSE,
401-
return_both_score_pvals = FALSE)
408+
return_both_score_pvals = FALSE,
409+
test_kj = data.frame(k = 1, j = 1:6))
402410
})
403411

404412
expect_equal(fitted_model$coef[, 2:9], fitted_model1$coef[, 2:9])

tests/testthat/test-plot_emuFit.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ fitted_model <- emuFit(Y = Y,
2323
run_score_tests = FALSE,
2424
use_fullmodel_info = FALSE,
2525
use_fullmodel_cov = FALSE,
26-
return_both_score_pvals = FALSE)
26+
return_both_score_pvals = FALSE,
27+
match_row_names = FALSE)
2728

2829
test_that("plot() returns data frame and plot", {
2930
plot_out <- plot(x = fitted_model)

0 commit comments

Comments
 (0)