insightsengineering · m7pr · Nov 6, 2024 · Nov 4, 2024 · Nov 5, 2024 · Nov 5, 2024
diff --git a/R/qenv-class.R b/R/qenv-class.R
@@ -11,9 +11,11 @@
 #' @section Code:
 #'
 #' Each code element is a character representing one call. Each element has possible attributes:
-#' - warnings (`character`) the warnings output when evaluating the code element
-#' - messages (`character`) the messages output when evaluating the code element
-#' - id (`integer`) random identifier of the code element to make sure uniqueness when joining.
+#' - `warnings` (`character`) the warnings output when evaluating the code element
+#' - `messages` (`character`) the messages output when evaluating the code element
+#' - `id (`integer`) random identifier of the code element to make sure uniqueness when joining
+#' - `dependency` (`character`) names of objects that appear in this call and gets affected by this call,
+#' separated by `<-` (objects on LHS of `<-` are affected by this line, and objects on RHS are affecting this line)
 #'
 #' @keywords internal
 #' @exportClass qenv

diff --git a/R/qenv-eval_code.R b/R/qenv-eval_code.R
@@ -37,7 +37,7 @@ setMethod("eval_code", signature = c("qenv", "character"), function(object, code
 
   for (i in seq_along(code_split)) {
     current_code <- code_split[[i]]
-    current_call <- parse(text = current_code, keep.source = FALSE)
+    current_call <- parse(text = trimws(current_code), keep.source = TRUE)
 
     # Using withCallingHandlers to capture warnings and messages.
     # Using tryCatch to capture the error and abort further evaluation.
@@ -79,6 +79,12 @@ setMethod("eval_code", signature = c("qenv", "character"), function(object, code
     }
 
     attr(current_code, "id") <- sample.int(.Machine$integer.max, size = 1)
+
+    pd <- utils::getParseData(current_call)
+    pd <- normalize_pd(pd)
+    call_pd <- extract_calls(pd)[[1]]
+
+    attr(current_code, "dependency") <- c(extract_side_effects(call_pd), extract_occurrence(call_pd))
     object@code <- c(object@code, list(current_code))
   }
 

diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R
@@ -33,18 +33,11 @@ get_code_dependency <- function(code, names, check_names = TRUE) {
     return(code)
   }
 
-  # If code is bound in curly brackets, remove them.
-  # TODO: rethink if this is still needed when code is divided by calls?
-  tcode <- trimws(code)
-  if (any(grepl("^\\{.*\\}$", tcode))) {
-    tcode <- sub("^\\{(.*)\\}$", "\\1", tcode)
-  }
-
-  parsed_code <- parse(text = tcode, keep.source = TRUE)
+  parsed_code <- parse(text = trimws(code), keep.source = TRUE)
 
   pd <- utils::getParseData(parsed_code)
   pd <- normalize_pd(pd)
-  calls_pd <- extract_calls(pd)
+  calls_pd <- extract_calls(pd) # STILL NEEDED for check_names
 
   if (check_names) {
     # Detect if names are actually in code.
@@ -60,10 +53,10 @@ get_code_dependency <- function(code, names, check_names = TRUE) {
     }
   }
 
-  graph <- code_graph(calls_pd)
+  graph <- lapply(code, attr, "dependency")
   ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))
 
-  lib_ind <- detect_libraries(calls_pd)
+  lib_ind <- detect_libraries(graph)
 
   code_ids <- sort(unique(c(lib_ind, ind)))
   code[code_ids]
@@ -189,53 +182,25 @@ sub_arrows <- function(call) {
 
 # code_graph ----
 
-#' Create object dependencies graph within parsed code
-#'
-#' Builds dependency graph that identifies dependencies between objects in parsed code.
-#' Helps understand which objects depend on which.
-#'
-#' @param calls_pd `list` of `data.frame`s;
-#'  result of `utils::getParseData()` split into subsets representing individual calls;
-#'  created by `extract_calls()` function
-#'
-#' @return
-#' A list (of length of input `calls_pd`) where each element represents one call.
-#' Each element is a character vector listing names of objects that depend on this call
-#' and names of objects that this call depends on.
-#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
-#' depends on objects `b` and `c`.
-#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
-#'
-#' @keywords internal
-#' @noRd
-code_graph <- function(calls_pd) {
-  cooccurrence <- extract_occurrence(calls_pd)
-
-  side_effects <- extract_side_effects(calls_pd)
-
-  mapply(c, side_effects, cooccurrence, SIMPLIFY = FALSE)
-}
-
 #' Extract object occurrence
 #'
-#' Extracts objects occurrence within calls passed by `calls_pd`.
+#' Extracts objects occurrence within calls passed by `pd`.
 #' Also detects which objects depend on which within a call.
 #'
-#' @param calls_pd `list` of `data.frame`s;
-#'  result of `utils::getParseData()` split into subsets representing individual calls;
+#' @param pd `data.frame`;
+#'  one of the results of `utils::getParseData()` split into subsets representing individual calls;
 #'  created by `extract_calls()` function
 #'
 #' @return
-#' A list (of length of input `calls_pd`) where each element represents one call.
-#' Each element is a character vector listing names of objects that depend on this call
+#' A character vector listing names of objects that depend on this call
 #' and names of objects that this call depends on.
 #' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
 #' depends on objects `b` and `c`.
 #' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
 #'
 #' @keywords internal
 #' @noRd
-extract_occurrence <- function(calls_pd) {
+extract_occurrence <- function(pd) {
   is_in_function <- function(x) {
     # If an object is a function parameter,
     # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.
@@ -253,23 +218,21 @@ extract_occurrence <- function(calls_pd) {
       x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end]
     }
   }
-  lapply(
-    calls_pd,
-    function(call_pd) {
+
       # Handle data(object)/data("object")/data(object, envir = ) independently.
-      data_call <- find_call(call_pd, "data")
+      data_call <- find_call(pd, "data")
       if (data_call) {
-        sym <- call_pd[data_call + 1, "text"]
+        sym <- pd[data_call + 1, "text"]
         return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
       }
       # Handle assign(x = ).
-      assign_call <- find_call(call_pd, "assign")
+      assign_call <- find_call(pd, "assign")
       if (assign_call) {
         # Check if parameters were named.
         # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.
         # "EQ_SUB" is for `=` appearing after the name of the named parameter.
-        if (any(call_pd$token == "SYMBOL_SUB")) {
-          params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
+        if (any(pd$token == "SYMBOL_SUB")) {
+          params <- pd[pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
           # Remove sequence of "=", ",".
           if (length(params > 1)) {
             remove <- integer(0)
@@ -294,12 +257,12 @@ extract_occurrence <- function(calls_pd) {
           # Object is the first entry after 'assign'.
           pos <- 1
         }
-        sym <- call_pd[assign_call + pos, "text"]
+        sym <- pd[assign_call + pos, "text"]
         return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
       }
 
       # What occurs in a function body is not tracked.
-      x <- call_pd[!is_in_function(call_pd), ]
+      x <- pd[!is_in_function(pd), ]
       sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL"))
 
       if (length(sym_cond) == 0) {
@@ -327,7 +290,7 @@ extract_occurrence <- function(calls_pd) {
 
       after <- match(min(x$id[ass_cond]), sort(x$id[c(min(ass_cond), sym_cond)])) - 1
       ans <- append(x[sym_cond, "text"], "<-", after = max(1, after))
-      roll <- in_parenthesis(call_pd)
+      roll <- in_parenthesis(pd)
       if (length(roll)) {
         c(setdiff(ans, roll), roll)
       } else {
@@ -336,8 +299,6 @@ extract_occurrence <- function(calls_pd) {
 
       ### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c.
       ### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('.
-    }
-  )
 }
 
 #' Extract side effects
@@ -350,24 +311,19 @@ extract_occurrence <- function(calls_pd) {
 #' With this tag a complete object dependency structure can be established.
 #' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.
 #'
-#' @param calls_pd `list` of `data.frame`s;
-#'  result of `utils::getParseData()` split into subsets representing individual calls;
+#' @param pd `data.frame`;
+#'  one of the results of `utils::getParseData()` split into subsets representing individual calls;
 #'  created by `extract_calls()` function
 #'
 #' @return
-#' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects
-#' depending a call tagged with `@linksto` in a corresponding element of `calls_pd`.
+#' A character vector of names of objects
+#' depending a call tagged with `@linksto` in a corresponding element of `pd`.
 #'
 #' @keywords internal
 #' @noRd
-extract_side_effects <- function(calls_pd) {
-  lapply(
-    calls_pd,
-    function(x) {
-      linksto <- grep("@linksto", x[x$token == "COMMENT", "text"], value = TRUE)
-      unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+"))
-    }
-  )
+extract_side_effects <- function(pd) {
+  linksto <- grep("@linksto", pd[pd$token == "COMMENT", "text"], value = TRUE)
+  unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+"))
 }
 
 # graph_parser ----
@@ -414,26 +370,24 @@ graph_parser <- function(x, graph) {
 #'
 #' Detects `library()` and `require()` function calls.
 #'
-#' @param calls_pd `list` of `data.frame`s;
-#'  result of `utils::getParseData()` split into subsets representing individual calls;
-#'  created by `extract_calls()` function
+#' @param `graph` the dependency graph, result of `lapply(code, attr, "dependency")`
 #'
 #' @return
-#' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing
+#' Integer vector of indices that can be applied to `graph` to obtain all calls containing
 #' `library()` or `require()` calls that are always returned for reproducibility.
 #'
 #' @keywords internal
 #' @noRd
-detect_libraries <- function(calls_pd) {
+detect_libraries <- function(graph) {
   defaults <- c("library", "require")
 
   which(
-    vapply(
-      calls_pd,
-      function(call) {
-        any(call$token == "SYMBOL_FUNCTION_CALL" & call$text %in% defaults)
-      },
-      logical(1)
+    unlist(
+      lapply(
+        graph, function(x){
+          any(grepl(pattern = paste(defaults, collapse = "|"), x = x))
+        }
+      )
     )
   )
 }

diff --git a/man/qenv-class.Rd b/man/qenv-class.Rd
diff --git a/tests/testthat/test-qenv_eval_code.R b/tests/testthat/test-qenv_eval_code.R
@@ -129,3 +129,21 @@ testthat::test_that("eval_code returns a qenv object with empty messages and war
   testthat::expect_null(attr(q@code, "message"))
   testthat::expect_null(attr(q@code, "warning"))
 })
+
+testthat::test_that("eval_code returns a qenv object with dependency attribute", {
+  q <- eval_code(qenv(), "iris_data <- head(iris)")
+  testthat::expect_identical(get_code_attr(q, "dependency"), c("iris_data", "<-", "head", "iris"))
+
+  q2 <- eval_code(qenv(), c("x <- 5", "iris_data <- head(iris)", "nrow(iris_data) #@linksto x"))
+  testthat::expect_identical(
+    lapply(q2@code, attr, "dependency"),
+    list(
+      c("x", "<-"),
+      c("iris_data", "<-", "head", "iris"),
+      c("x", "<-", "nrow", "iris_data")
+    )
+  )
+
+  q3 <- eval_code(qenv(), c("library(survival)", "library(utils)", "x <- 5"))
+  lapply(q3@code, attr, "dependency")
+})
diff --git a/tests/testthat/test-qenv_get_code.R b/tests/testthat/test-qenv_get_code.R
@@ -688,7 +688,7 @@ testthat::test_that("detects cooccurrence properly even if all objects are on lh
 # @ ---------------------------------------------------------------------------------------------------------------
 
 testthat::test_that("understands @ usage and do not treat rhs of @ as objects (only lhs)", {
-  code <- list(
+  code <- c(
     "setClass('aclass', slots = c(a = 'numeric', x = 'numeric', y = 'numeric')) # @linksto a x",
     "x <- new('aclass', a = 1:3, x = 1:3, y = 1:3)",
     "a <- new('aclass', a = 1:3, x = 1:3, y = 1:3)",
@@ -697,14 +697,14 @@ testthat::test_that("understands @ usage and do not treat rhs of @ as objects (o
     "a@x <- x@a"
   )
   q <- qenv()
-  q@code <- code # we don't use eval_code so the code is not run
+  q <- eval_code(q, code)
   testthat::expect_identical(
     get_code_g(q, names = "x"),
-    unlist(code[1:2])
+    code[1:2]
   )
   testthat::expect_identical(
     get_code_g(q, names = "a"),
-    unlist(code)
+    code
   )
 })