statdivlab
diff --git a/‎DESCRIPTION‎
Lines changed: 4 additions & 6 deletions b/‎DESCRIPTION‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎NEWS.md‎
Lines changed: 11 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎R/emuFit.R‎
Lines changed: 5 additions & 4 deletions b/‎R/emuFit.R‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎R/emuFit_check.R‎
Lines changed: 10 additions & 1 deletion b/‎R/emuFit_check.R‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎man/emuFit.Rd‎
Lines changed: 3 additions & 3 deletions b/‎man/emuFit.Rd‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎man/emuFit_check.Rd‎
Lines changed: 4 additions & 1 deletion b/‎man/emuFit_check.Rd‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎tests/testthat/test-augmentation-failures.R‎
Lines changed: 8 additions & 4 deletions b/‎tests/testthat/test-augmentation-failures.R‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎tests/testthat/test-cluster.R‎
Lines changed: 10 additions & 5 deletions b/‎tests/testthat/test-cluster.R‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎tests/testthat/test-emuFit.R‎
Lines changed: 21 additions & 13 deletions b/‎tests/testthat/test-emuFit.R‎
Lines changed: 21 additions & 13 deletions
diff --git a/‎tests/testthat/test-plot_emuFit.R‎
Lines changed: 2 additions & 1 deletion b/‎tests/testthat/test-plot_emuFit.R‎
Lines changed: 2 additions & 1 deletion
@@ -1,11 +1,9 @@
 Package: radEmu
 Title: Using Relative Abundance Data to Estimate of Multiplicative Differences in Mean Absolute Abundance 
-Version: 1.3.0.0
-Authors@R: as.person(c(
-    "David Clausen <dsc24@uw.edu> [aut, cre]",
-    "Amy Willis [aut]", 
-    "Sarah Teichman [aut]"
-  ))
+Version: 2.0.0.0
+Authors@R: c(person("David", "Clausen", role = c("aut")),
+             person("Amy D", "Willis", email = "adwillis@uw.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2802-4317")),
+             person("Sarah", "Teichman", role = "aut"))
 Description: A differential abundance method for the analysis of microbiome data. radEmu estimates fold-differences in the abundance of taxa across samples relative to "typical" fold-differences. Notably, it does not require pseudocounts, nor choosing a denominator taxon. 
 URL: https://github.com/statdivlab/radEmu, https://statdivlab.github.io/radEmu/
 License: MIT + file LICENSE
 
@@ -0,0 +1,11 @@
+# radEmu 2.0.0.0
+
+This is a major release that speeds up score tests, and forces the user to clarify that they wish to perform score tests. It makes the default behaviour faster, but is not backwards compatible.
+
+## Breaking changes
+
+* The argument `test_kj` is now required for `emuFit()` when `run_score_tests = TRUE` (the default). Previous default behavior was to run score tests for every parameter, which can be very time consuming(and can easily be parallelized). This change forces the user to explicitly state what tests they would like to run, significantly decreasing unnecessary computation. 
+
+## Additional changes
+
+* We have also streamlined estimation under the null, leading to improved convergence and reduced computation. 
@@ -22,11 +22,11 @@
 #' should fitting step be skipped (FALSE), e.g., if score tests are to be run on an already
 #' fitted model. Default is TRUE.
 #' @param test_kj a data frame whose rows give coordinates (in category j and
-#' covariate k) of elements of B to construct hypothesis tests for. If \code{test_kj}
-#' is not provided, all elements of B save the intercept row will be tested. If you don't know
+#' covariate k) of elements of B to construct hypothesis tests for. If you don't know
 #' which indices k correspond to the covariate(s) that you would like to test, run the function
 #' \code{radEmu::make_design_matrix()} in order to view the design matrix, and identify which
-#' column of the design matrix corresponds to each covariate in your model.
+#' column of the design matrix corresponds to each covariate in your model. This argument is required when
+#' running score tests.
 #' @param alpha nominal type 1 error level to be used to construct confidence intervals. Default is 0.05
 #' (corresponding to 95% confidence intervals)
 #' @param return_wald_p logical: return p-values from Wald tests? Default is FALSE.
@@ -207,7 +207,8 @@ emuFit <- function(Y,
                                 match_row_names = match_row_names,
                                 verbose = verbose,
                                 remove_zero_comparison_pvals = remove_zero_comparison_pvals,
-                                unobserved_taxon_error = unobserved_taxon_error)
+                                unobserved_taxon_error = unobserved_taxon_error,
+                                run_score_tests = run_score_tests)
   Y <- check_results$Y
   X <- check_results$X
   cluster <- check_results$cluster
 
@@ -29,6 +29,7 @@
 #' If a value between 0 and 1, all zero-comparison p-values below the value will be set to NA. 
 #' Default is \code{0.01}. 
 #' @param unobserved_taxon_error logical: should an error be thrown if Y includes taxa that have 0 counts for all samples? Default is TRUE.
+#' @param run_score_tests logical: perform robust score testing? 
 #' 
 #' @return returns objects \code{Y}, \code{X}, \code{cluster}, and \code{B_null_list}, which may be modified by tests, and throw any useful
 #' errors, warnings, or messages.
@@ -46,7 +47,8 @@ emuFit_check <- function(Y,
                          match_row_names = TRUE,
                          verbose = FALSE,
                          remove_zero_comparison_pvals = 0.01,
-                         unobserved_taxon_error = TRUE) {
+                         unobserved_taxon_error = TRUE,
+                         run_score_tests = TRUE) {
 
   # confirm that input to verbose is valid
   if (!(verbose %in% c(FALSE, TRUE, "development"))) {
@@ -204,6 +206,13 @@ ignoring argument 'cluster'.")
     }
   }
 
+  # check that test_kj is not null if running score tests
+  if (run_score_tests) {
+    if (is.null(test_kj)) {
+      stop("When `run_score_tests = TRUE`, you must provide a matrix `test_kj` to determine which parameters you want to test. If you don't know which indices k correspond to the covariate(s) that you would like to test, run the function `radEmu::make_design_matrix()` in order to view the design matrix, and identify which column of the design matrix corresponds to each covariate in your model. If you don't know which indices j correspond to categories (taxa) that you want to test, you can look at the columns and column names of your `Y` matrix.")
+    }
+  }
+  
   # check for valid argument remove_zero_comparison_pvals 
   if (remove_zero_comparison_pvals != TRUE & remove_zero_comparison_pvals != FALSE) {
     if (!(is.numeric(remove_zero_comparison_pvals) & remove_zero_comparison_pvals <= 1 &
 
@@ -22,8 +22,10 @@ test_that("confirm Matrix Csparse_transpose issue is not happening", {
                          B_null_tol = 1e-2,
                          tolerance = 0.01,
                          tau = 2,
-                         run_score_test = TRUE,
-                         return_wald_p = TRUE)
+                         run_score_tests = TRUE,
+                         return_wald_p = TRUE,
+                         test_kj = data.frame(k = 2, j = 1),
+                         match_row_names = FALSE)
 
   expect_true("emuFit" %in% class(fitted_model))
 
@@ -37,8 +39,10 @@ test_that("confirm Matrix Csparse_transpose issue is not happening", {
                             B_null_tol = 1e-2,
                             tolerance = 0.01,
                             tau = 2,
-                            run_score_test = TRUE,
-                            return_wald_p = TRUE)
+                            run_score_tests = TRUE,
+                            return_wald_p = TRUE,
+                            test_kj = data.frame(k = 2, j = 1),
+                            match_row_names = FALSE)
 
   expect_true("emuFit" %in% class(fitted_model_df))
 
 
@@ -23,7 +23,8 @@ test_that("clusters work as I want", {
                    data = XX, 
                    Y = Y, 
                    cluster=cage_num, 
-                   run_score_tests=FALSE) #### very fast
+                   run_score_tests=FALSE,
+                   match_row_names = FALSE) #### very fast
   expect_equal(ef_num$coef %>% class, "data.frame")
 
   # check that cluster argument works as character vector and gives 
@@ -33,7 +34,8 @@ test_that("clusters work as I want", {
                     data = XX, 
                     Y = Y, 
                     cluster=cage_char, 
-                    run_score_tests=FALSE) 
+                    run_score_tests=FALSE,
+                    match_row_names = FALSE) 
   expect_equal(ef_num$coef, ef_char$coef)
 
   # check that cluster argument works as factor and gives equivalent results
@@ -43,7 +45,8 @@ test_that("clusters work as I want", {
                     data = XX, 
                     Y = Y, 
                     cluster=cage_fact, 
-                    run_score_tests=FALSE) 
+                    run_score_tests=FALSE,
+                    match_row_names = FALSE) 
   expect_equal(ef_num$coef, ef_fact$coef)
 })
 
@@ -110,7 +113,8 @@ test_that("GEE with cluster covariance gives plausible type 1 error ",{
                                    return_both_score_pvals = FALSE,
                                    test_kj = data.frame(k = c(2,2),
                                                         j = c(3,4)),
-                                   cluster = cluster)
+                                   cluster = cluster,
+                                   match_row_names = FALSE)
 
     fitted_model_nocluster <- emuFit(Y = Y,
                                      X = X,
@@ -127,7 +131,8 @@ test_that("GEE with cluster covariance gives plausible type 1 error ",{
                                      use_fullmodel_cov = FALSE,
                                      return_both_score_pvals = FALSE,
                                      test_kj = data.frame(k = c(2,2),
-                                                          j = c(3,4)))
+                                                          j = c(3,4)),
+                                     match_row_names = FALSE)
     # })
 
     filtered_coef <- fitted_model_cluster$coef[!is.na(fitted_model_cluster$coef$pval),
 
@@ -39,7 +39,8 @@ test_that("emuFit takes formulas and actually fits a model", {
                            run_score_tests = TRUE, 
                            use_fullmodel_info = FALSE,
                            use_fullmodel_cov = FALSE,
-                           return_both_score_pvals = FALSE)
+                           return_both_score_pvals = FALSE,
+                           test_kj = data.frame(k = 2, j = 1:6))
   })
 
 
@@ -58,7 +59,7 @@ test_that("emuFit takes formulas and actually fits a model", {
                          formula = ~group,
                          data = covariates,
                          refit = FALSE,
-                         run_score_test = FALSE,
+                         run_score_tests= FALSE,
                          fitted_model = fitted_model)
 
   expect_identical(fitted_model$coef$estimate, second_model$coef$estimate)
@@ -72,10 +73,11 @@ test_that("emuFit takes formulas and actually fits a model", {
                                             B_null_tol = 0.01,
                                             tolerance = 0.01,
                                             data = covariates,
-                                            run_score_test = TRUE,
+                                            run_score_tests= TRUE,
                                             return_wald_p = TRUE, ### diff
                                             use_fullmodel_info = TRUE, ### diff
-                                            verbose = FALSE)
+                                            verbose = FALSE,
+                                            test_kj = data.frame(k = 2, j = 1:6))
 
 
   expect_true(all(fitted_model_use_fullmodel_info$coef$wald_p>0 & fitted_model_use_fullmodel_info$coef$wald_p<1))
@@ -92,11 +94,12 @@ test_that("emuFit takes formulas and actually fits a model", {
                                formula = ~group,
                                tau = 1.2,
                                data = covariates,
-                               run_score_test = TRUE,
+                               run_score_tests= TRUE,
                                return_wald_p = TRUE,
                                use_fullmodel_info = TRUE,
                                verbose = FALSE,
-                               return_both_score_pvals = TRUE)
+                               return_both_score_pvals = TRUE,
+                               test_kj = data.frame(k = 2, j = 1:6))
 
   ps_full <- fitted_model_both$coef$score_pval_full_info
   ps_null <- fitted_model_both$coef$score_pval_null_info
@@ -115,7 +118,7 @@ test_that("emuFit takes formulas and actually fits a model", {
   #                       formula = ~group,
   #                       tau = 1.2,
   #                       data = covariates,
-  #                       run_score_test = TRUE,
+  #                       run_score_tests= TRUE,
   #                       return_wald_p = TRUE,
   #                       use_fullmodel_info = TRUE,
   #                       verbose = FALSE,
@@ -146,7 +149,8 @@ test_that("emuFit takes cluster argument without breaking ",{
                                    use_fullmodel_info = FALSE,
                                    use_fullmodel_cov = FALSE,
                                    return_both_score_pvals = FALSE,
-                                   cluster = rep(1:3,each = 4))
+                                   cluster = rep(1:3,each = 4),
+                                   test_kj = data.frame(k = 2, j = 1:6))
           })
 
   expect_silent({
@@ -163,7 +167,8 @@ test_that("emuFit takes cluster argument without breaking ",{
                                    run_score_tests = TRUE, 
                                    use_fullmodel_info = FALSE,
                                    use_fullmodel_cov = FALSE,
-                                   return_both_score_pvals = FALSE)
+                                   return_both_score_pvals = FALSE,
+                                   test_kj = data.frame(k = 2, j = 1:6))
   })
 
   expect_true(all(fitted_model_nocluster$coef$estimate == fitted_model_cluster$coef$estimate))
@@ -336,7 +341,7 @@ test_that("GEE with cluster covariance gives plausible type 1 error ",{
 #            formula = ~group,
 #            data = covariates,
 #            tolerance = 0.01,
-#            run_score_test = FALSE,
+#            run_score_tests= FALSE,
 #            return_wald_p = TRUE)
 #   
 #   
@@ -364,7 +369,8 @@ test_that("emuFit runs without penalty", {
                            run_score_tests = TRUE, 
                            use_fullmodel_info = FALSE,
                            use_fullmodel_cov = FALSE,
-                           return_both_score_pvals = FALSE)
+                           return_both_score_pvals = FALSE,
+                           test_kj = data.frame(k = 2, j = 1:6))
   })
 })
 
@@ -383,7 +389,8 @@ test_that("emuFit runs with just intercept model", {
                            run_score_tests = TRUE, 
                            use_fullmodel_info = FALSE,
                            use_fullmodel_cov = FALSE,
-                           return_both_score_pvals = FALSE)
+                           return_both_score_pvals = FALSE,
+                           test_kj = data.frame(k = 1, j = 1:6))
   })
 
   expect_message({
@@ -398,7 +405,8 @@ test_that("emuFit runs with just intercept model", {
                            run_score_tests = TRUE, 
                            use_fullmodel_info = FALSE,
                            use_fullmodel_cov = FALSE,
-                           return_both_score_pvals = FALSE)
+                           return_both_score_pvals = FALSE,
+                           test_kj = data.frame(k = 1, j = 1:6))
   })
 
   expect_equal(fitted_model$coef[, 2:9], fitted_model1$coef[, 2:9])
 
@@ -23,7 +23,8 @@ fitted_model <- emuFit(Y = Y,
                        run_score_tests = FALSE, 
                        use_fullmodel_info = FALSE,
                        use_fullmodel_cov = FALSE,
-                       return_both_score_pvals = FALSE)
+                       return_both_score_pvals = FALSE,
+                       match_row_names = FALSE)
 
 test_that("plot() returns data frame and plot", {
   plot_out <- plot(x = fitted_model)