Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions R/qenv-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
#' @section Code:
#'
#' Each code element is a character representing one call. Each element has possible attributes:
#' - warnings (`character`) the warnings output when evaluating the code element
#' - messages (`character`) the messages output when evaluating the code element
#' - id (`integer`) random identifier of the code element to make sure uniqueness when joining.
#' - `warnings` (`character`) the warnings output when evaluating the code element
#' - `messages` (`character`) the messages output when evaluating the code element
#' - `id (`integer`) random identifier of the code element to make sure uniqueness when joining
#' - `dependency` (`character`) names of objects that appear in this call and gets affected by this call,
#' separated by `<-` (objects on LHS of `<-` are affected by this line, and objects on RHS are affecting this line)
#'
#' @keywords internal
#' @exportClass qenv
Expand Down
3 changes: 2 additions & 1 deletion R/qenv-eval_code.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ setMethod("eval_code", signature = c("qenv", "character"), function(object, code

for (i in seq_along(code_split)) {
current_code <- code_split[[i]]
current_call <- parse(text = current_code, keep.source = FALSE)
current_call <- parse(text = trimws(current_code), keep.source = TRUE)

# Using withCallingHandlers to capture warnings and messages.
# Using tryCatch to capture the error and abort further evaluation.
Expand Down Expand Up @@ -79,6 +79,7 @@ setMethod("eval_code", signature = c("qenv", "character"), function(object, code
}

attr(current_code, "id") <- sample.int(.Machine$integer.max, size = 1)
attr(current_code, "dependency") <- extract_dependency(current_call)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This loop looks nice now. extract_dependency is a good move 👍

object@code <- c(object@code, list(current_code))
}

Expand Down
130 changes: 45 additions & 85 deletions R/utils-get_code_dependency.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,13 @@ get_code_dependency <- function(code, names, check_names = TRUE) {
return(code)
}

# If code is bound in curly brackets, remove them.
# TODO: rethink if this is still needed when code is divided by calls?
tcode <- trimws(code)
if (any(grepl("^\\{.*\\}$", tcode))) {
tcode <- sub("^\\{(.*)\\}$", "\\1", tcode)
}

parsed_code <- parse(text = tcode, keep.source = TRUE)

pd <- utils::getParseData(parsed_code)
pd <- normalize_pd(pd)
calls_pd <- extract_calls(pd)

if (check_names) {
# Detect if names are actually in code.
symbols <- unlist(lapply(calls_pd, function(call) call[call$token == "SYMBOL", "text"]))
parsed_code <- parse(text = trimws(code), keep.source = TRUE)
pd <- normalize_pd(utils::getParseData(parsed_code))
symbols <- pd[pd$token == "SYMBOL", "text"]
if (any(pd$text == "assign")) {
calls_pd <- extract_calls(pd)
assign_calls <- Filter(function(call) find_call(call, "assign"), calls_pd)
ass_str <- unlist(lapply(assign_calls, function(call) call[call$token == "STR_CONST", "text"]))
ass_str <- gsub("^['\"]|['\"]$", "", ass_str)
Expand All @@ -60,10 +50,10 @@ get_code_dependency <- function(code, names, check_names = TRUE) {
}
}

graph <- code_graph(calls_pd)
graph <- lapply(code, attr, "dependency")
ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))

lib_ind <- detect_libraries(calls_pd)
lib_ind <- detect_libraries(graph)

code_ids <- sort(unique(c(lib_ind, ind)))
code[code_ids]
Expand Down Expand Up @@ -189,53 +179,25 @@ sub_arrows <- function(call) {

# code_graph ----

#' Create object dependencies graph within parsed code
#'
#' Builds dependency graph that identifies dependencies between objects in parsed code.
#' Helps understand which objects depend on which.
#'
#' @param calls_pd `list` of `data.frame`s;
#' result of `utils::getParseData()` split into subsets representing individual calls;
#' created by `extract_calls()` function
#'
#' @return
#' A list (of length of input `calls_pd`) where each element represents one call.
#' Each element is a character vector listing names of objects that depend on this call
#' and names of objects that this call depends on.
#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
#' depends on objects `b` and `c`.
#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
#'
#' @keywords internal
#' @noRd
code_graph <- function(calls_pd) {
cooccurrence <- extract_occurrence(calls_pd)

side_effects <- extract_side_effects(calls_pd)

mapply(c, side_effects, cooccurrence, SIMPLIFY = FALSE)
}

#' Extract object occurrence
#'
#' Extracts objects occurrence within calls passed by `calls_pd`.
#' Extracts objects occurrence within calls passed by `pd`.
#' Also detects which objects depend on which within a call.
#'
#' @param calls_pd `list` of `data.frame`s;
#' result of `utils::getParseData()` split into subsets representing individual calls;
#' @param pd `data.frame`;
#' one of the results of `utils::getParseData()` split into subsets representing individual calls;
#' created by `extract_calls()` function
#'
#' @return
#' A list (of length of input `calls_pd`) where each element represents one call.
#' Each element is a character vector listing names of objects that depend on this call
#' A character vector listing names of objects that depend on this call
#' and names of objects that this call depends on.
#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
#' depends on objects `b` and `c`.
#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
#'
#' @keywords internal
#' @noRd
extract_occurrence <- function(calls_pd) {
extract_occurrence <- function(pd) {
is_in_function <- function(x) {
# If an object is a function parameter,
# then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.
Expand All @@ -253,23 +215,21 @@ extract_occurrence <- function(calls_pd) {
x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end]
}
}
lapply(
calls_pd,
function(call_pd) {

# Handle data(object)/data("object")/data(object, envir = ) independently.
data_call <- find_call(call_pd, "data")
data_call <- find_call(pd, "data")
if (data_call) {
sym <- call_pd[data_call + 1, "text"]
sym <- pd[data_call + 1, "text"]
return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
}
# Handle assign(x = ).
assign_call <- find_call(call_pd, "assign")
assign_call <- find_call(pd, "assign")
if (assign_call) {
# Check if parameters were named.
# "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.
# "EQ_SUB" is for `=` appearing after the name of the named parameter.
if (any(call_pd$token == "SYMBOL_SUB")) {
params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
if (any(pd$token == "SYMBOL_SUB")) {
params <- pd[pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
# Remove sequence of "=", ",".
if (length(params > 1)) {
remove <- integer(0)
Expand All @@ -294,12 +254,12 @@ extract_occurrence <- function(calls_pd) {
# Object is the first entry after 'assign'.
pos <- 1
}
sym <- call_pd[assign_call + pos, "text"]
sym <- pd[assign_call + pos, "text"]
return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
}

# What occurs in a function body is not tracked.
x <- call_pd[!is_in_function(call_pd), ]
x <- pd[!is_in_function(pd), ]
sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL"))

if (length(sym_cond) == 0) {
Expand Down Expand Up @@ -327,7 +287,7 @@ extract_occurrence <- function(calls_pd) {

after <- match(min(x$id[ass_cond]), sort(x$id[c(min(ass_cond), sym_cond)])) - 1
ans <- append(x[sym_cond, "text"], "<-", after = max(1, after))
roll <- in_parenthesis(call_pd)
roll <- in_parenthesis(pd)
if (length(roll)) {
c(setdiff(ans, roll), roll)
} else {
Expand All @@ -336,8 +296,6 @@ extract_occurrence <- function(calls_pd) {

### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c.
### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('.
}
)
}

#' Extract side effects
Expand All @@ -350,24 +308,19 @@ extract_occurrence <- function(calls_pd) {
#' With this tag a complete object dependency structure can be established.
#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.
#'
#' @param calls_pd `list` of `data.frame`s;
#' result of `utils::getParseData()` split into subsets representing individual calls;
#' @param pd `data.frame`;
#' one of the results of `utils::getParseData()` split into subsets representing individual calls;
#' created by `extract_calls()` function
#'
#' @return
#' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects
#' depending a call tagged with `@linksto` in a corresponding element of `calls_pd`.
#' A character vector of names of objects
#' depending a call tagged with `@linksto` in a corresponding element of `pd`.
#'
#' @keywords internal
#' @noRd
extract_side_effects <- function(calls_pd) {
lapply(
calls_pd,
function(x) {
linksto <- grep("@linksto", x[x$token == "COMMENT", "text"], value = TRUE)
unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+"))
}
)
extract_side_effects <- function(pd) {
linksto <- grep("@linksto", pd[pd$token == "COMMENT", "text"], value = TRUE)
unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+"))
}

# graph_parser ----
Expand Down Expand Up @@ -414,26 +367,24 @@ graph_parser <- function(x, graph) {
#'
#' Detects `library()` and `require()` function calls.
#'
#' @param calls_pd `list` of `data.frame`s;
#' result of `utils::getParseData()` split into subsets representing individual calls;
#' created by `extract_calls()` function
#' @param `graph` the dependency graph, result of `lapply(code, attr, "dependency")`
#'
#' @return
#' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing
#' Integer vector of indices that can be applied to `graph` to obtain all calls containing
#' `library()` or `require()` calls that are always returned for reproducibility.
#'
#' @keywords internal
#' @noRd
detect_libraries <- function(calls_pd) {
detect_libraries <- function(graph) {
defaults <- c("library", "require")

which(
vapply(
calls_pd,
function(call) {
any(call$token == "SYMBOL_FUNCTION_CALL" & call$text %in% defaults)
},
logical(1)
unlist(
lapply(
graph, function(x){
any(grepl(pattern = paste(defaults, collapse = "|"), x = x))
}
)
)
)
}
Expand Down Expand Up @@ -511,3 +462,12 @@ split_code <- function(code) {
# semicolon is treated by R parser as a separate call.
gsub("^([[:space:]])*;(.+)$", "\\1\\2", new_code, perl = TRUE)
}

#' @param parsed_code results of `parse(text = code, keep.source = TRUE` (parsed text)
#' @keywords internal
#' @noRd
extract_dependency <- function(parsed_code) {
pd <- normalize_pd(utils::getParseData(parsed_code))
call_pd <- extract_calls(pd)[[1]]
c(extract_side_effects(call_pd), extract_occurrence(call_pd))
}
8 changes: 5 additions & 3 deletions man/qenv-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions tests/testthat/test-qenv_eval_code.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,21 @@ testthat::test_that("eval_code returns a qenv object with empty messages and war
testthat::expect_null(attr(q@code, "message"))
testthat::expect_null(attr(q@code, "warning"))
})

testthat::test_that("eval_code returns a qenv object with dependency attribute", {
q <- eval_code(qenv(), "iris_data <- head(iris)")
testthat::expect_identical(get_code_attr(q, "dependency"), c("iris_data", "<-", "head", "iris"))

q2 <- eval_code(qenv(), c("x <- 5", "iris_data <- head(iris)", "nrow(iris_data) #@linksto x"))
testthat::expect_identical(
lapply(q2@code, attr, "dependency"),
list(
c("x", "<-"),
c("iris_data", "<-", "head", "iris"),
c("x", "<-", "nrow", "iris_data")
)
)

q3 <- eval_code(qenv(), c("library(survival)", "library(utils)", "x <- 5"))
lapply(q3@code, attr, "dependency")
})
8 changes: 4 additions & 4 deletions tests/testthat/test-qenv_get_code.R
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ testthat::test_that("detects cooccurrence properly even if all objects are on lh
# @ ---------------------------------------------------------------------------------------------------------------

testthat::test_that("understands @ usage and do not treat rhs of @ as objects (only lhs)", {
code <- list(
code <- c(
"setClass('aclass', slots = c(a = 'numeric', x = 'numeric', y = 'numeric')) # @linksto a x",
"x <- new('aclass', a = 1:3, x = 1:3, y = 1:3)",
"a <- new('aclass', a = 1:3, x = 1:3, y = 1:3)",
Expand All @@ -697,14 +697,14 @@ testthat::test_that("understands @ usage and do not treat rhs of @ as objects (o
"a@x <- x@a"
)
q <- qenv()
q@code <- code # we don't use eval_code so the code is not run
q <- eval_code(q, code)
testthat::expect_identical(
get_code_g(q, names = "x"),
unlist(code[1:2])
code[1:2]
)
testthat::expect_identical(
get_code_g(q, names = "a"),
unlist(code)
code
)
})

Expand Down
Loading