Skip to content

Commit 6f03292

Browse files
m7prgogonzo
andauthored
assign side_effects and occurrence as attributes of @code (#223)
Part of #216 Changes: - [x] moved `dependency` extraction from `get_code_dependency` to `eval_code` - [x] removed `extract_code_graph` - [x] extended documentation of `qenv` with 1 new attributes: `dependency`, `occurrence` - [x] merged `side_effects` and `occurrence` inside `eval_code` as they were previously joined in `extract_code_graph` anyway - [x] created tests for `qenv() |> eval_code |> get_code_attr("dependency")` - [x] changed `extract_side_effects` and `extract_occurrence` so they work on an element, and they don't use `lapply` --------- Signed-off-by: Marcin <[email protected]> Co-authored-by: Dawid Kałędkowski <[email protected]>
1 parent 26efa3f commit 6f03292

File tree

6 files changed

+111
-102
lines changed

6 files changed

+111
-102
lines changed

R/qenv-class.R

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@
1111
#' @section Code:
1212
#'
1313
#' Each code element is a character representing one call. Each element has possible attributes:
14-
#' - warnings (`character`) the warnings output when evaluating the code element
15-
#' - messages (`character`) the messages output when evaluating the code element
16-
#' - id (`integer`) random identifier of the code element to make sure uniqueness when joining.
14+
#' - `warnings` (`character`) the warnings output when evaluating the code element
15+
#' - `messages` (`character`) the messages output when evaluating the code element
16+
#' - `id (`integer`) random identifier of the code element to make sure uniqueness when joining
17+
#' - `dependency` (`character`) names of objects that appear in this call and gets affected by this call,
18+
#' separated by `<-` (objects on LHS of `<-` are affected by this line, and objects on RHS are affecting this line)
1719
#'
1820
#' @keywords internal
1921
#' @exportClass qenv

R/qenv-eval_code.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ setMethod("eval_code", signature = c("qenv", "character"), function(object, code
3737

3838
for (i in seq_along(code_split)) {
3939
current_code <- code_split[[i]]
40-
current_call <- parse(text = current_code, keep.source = FALSE)
40+
current_call <- parse(text = current_code, keep.source = TRUE)
4141

4242
# Using withCallingHandlers to capture warnings and messages.
4343
# Using tryCatch to capture the error and abort further evaluation.
@@ -79,6 +79,7 @@ setMethod("eval_code", signature = c("qenv", "character"), function(object, code
7979
}
8080

8181
attr(current_code, "id") <- sample.int(.Machine$integer.max, size = 1)
82+
attr(current_code, "dependency") <- extract_dependency(current_call)
8283
object@code <- c(object@code, list(current_code))
8384
}
8485

R/utils-get_code_dependency.R

Lines changed: 59 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -33,37 +33,22 @@ get_code_dependency <- function(code, names, check_names = TRUE) {
3333
return(code)
3434
}
3535

36-
# If code is bound in curly brackets, remove them.
37-
# TODO: rethink if this is still needed when code is divided by calls?
38-
tcode <- trimws(code)
39-
if (any(grepl("^\\{.*\\}$", tcode))) {
40-
tcode <- sub("^\\{(.*)\\}$", "\\1", tcode)
41-
}
42-
43-
parsed_code <- parse(text = tcode, keep.source = TRUE)
44-
45-
pd <- utils::getParseData(parsed_code)
46-
pd <- normalize_pd(pd)
47-
calls_pd <- extract_calls(pd)
36+
graph <- lapply(code, attr, "dependency")
4837

4938
if (check_names) {
50-
# Detect if names are actually in code.
51-
symbols <- unlist(lapply(calls_pd, function(call) call[call$token == "SYMBOL", "text"]))
52-
if (any(pd$text == "assign")) {
53-
assign_calls <- Filter(function(call) find_call(call, "assign"), calls_pd)
54-
ass_str <- unlist(lapply(assign_calls, function(call) call[call$token == "STR_CONST", "text"]))
55-
ass_str <- gsub("^['\"]|['\"]$", "", ass_str)
56-
symbols <- c(ass_str, symbols)
57-
}
39+
symbols <- unlist(lapply(graph, function(call) {
40+
ind <- match("<-", call, nomatch = length(call) + 1L)
41+
call[seq_len(ind - 1L)]
42+
}))
43+
5844
if (!all(names %in% unique(symbols))) {
5945
warning("Object(s) not found in code: ", toString(setdiff(names, symbols)))
6046
}
6147
}
6248

63-
graph <- code_graph(calls_pd)
6449
ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))
6550

66-
lib_ind <- detect_libraries(calls_pd)
51+
lib_ind <- detect_libraries(graph)
6752

6853
code_ids <- sort(unique(c(lib_ind, ind)))
6954
code[code_ids]
@@ -189,53 +174,25 @@ sub_arrows <- function(call) {
189174

190175
# code_graph ----
191176

192-
#' Create object dependencies graph within parsed code
193-
#'
194-
#' Builds dependency graph that identifies dependencies between objects in parsed code.
195-
#' Helps understand which objects depend on which.
196-
#'
197-
#' @param calls_pd `list` of `data.frame`s;
198-
#' result of `utils::getParseData()` split into subsets representing individual calls;
199-
#' created by `extract_calls()` function
200-
#'
201-
#' @return
202-
#' A list (of length of input `calls_pd`) where each element represents one call.
203-
#' Each element is a character vector listing names of objects that depend on this call
204-
#' and names of objects that this call depends on.
205-
#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
206-
#' depends on objects `b` and `c`.
207-
#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
208-
#'
209-
#' @keywords internal
210-
#' @noRd
211-
code_graph <- function(calls_pd) {
212-
cooccurrence <- extract_occurrence(calls_pd)
213-
214-
side_effects <- extract_side_effects(calls_pd)
215-
216-
mapply(c, side_effects, cooccurrence, SIMPLIFY = FALSE)
217-
}
218-
219177
#' Extract object occurrence
220178
#'
221-
#' Extracts objects occurrence within calls passed by `calls_pd`.
179+
#' Extracts objects occurrence within calls passed by `pd`.
222180
#' Also detects which objects depend on which within a call.
223181
#'
224-
#' @param calls_pd `list` of `data.frame`s;
225-
#' result of `utils::getParseData()` split into subsets representing individual calls;
182+
#' @param pd `data.frame`;
183+
#' one of the results of `utils::getParseData()` split into subsets representing individual calls;
226184
#' created by `extract_calls()` function
227185
#'
228186
#' @return
229-
#' A list (of length of input `calls_pd`) where each element represents one call.
230-
#' Each element is a character vector listing names of objects that depend on this call
187+
#' A character vector listing names of objects that depend on this call
231188
#' and names of objects that this call depends on.
232189
#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
233190
#' depends on objects `b` and `c`.
234191
#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
235192
#'
236193
#' @keywords internal
237194
#' @noRd
238-
extract_occurrence <- function(calls_pd) {
195+
extract_occurrence <- function(pd) {
239196
is_in_function <- function(x) {
240197
# If an object is a function parameter,
241198
# then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.
@@ -253,23 +210,21 @@ extract_occurrence <- function(calls_pd) {
253210
x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end]
254211
}
255212
}
256-
lapply(
257-
calls_pd,
258-
function(call_pd) {
213+
259214
# Handle data(object)/data("object")/data(object, envir = ) independently.
260-
data_call <- find_call(call_pd, "data")
215+
data_call <- find_call(pd, "data")
261216
if (data_call) {
262-
sym <- call_pd[data_call + 1, "text"]
217+
sym <- pd[data_call + 1, "text"]
263218
return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
264219
}
265220
# Handle assign(x = ).
266-
assign_call <- find_call(call_pd, "assign")
221+
assign_call <- find_call(pd, "assign")
267222
if (assign_call) {
268223
# Check if parameters were named.
269224
# "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.
270225
# "EQ_SUB" is for `=` appearing after the name of the named parameter.
271-
if (any(call_pd$token == "SYMBOL_SUB")) {
272-
params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
226+
if (any(pd$token == "SYMBOL_SUB")) {
227+
params <- pd[pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
273228
# Remove sequence of "=", ",".
274229
if (length(params > 1)) {
275230
remove <- integer(0)
@@ -294,12 +249,12 @@ extract_occurrence <- function(calls_pd) {
294249
# Object is the first entry after 'assign'.
295250
pos <- 1
296251
}
297-
sym <- call_pd[assign_call + pos, "text"]
252+
sym <- pd[assign_call + pos, "text"]
298253
return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
299254
}
300255

301256
# What occurs in a function body is not tracked.
302-
x <- call_pd[!is_in_function(call_pd), ]
257+
x <- pd[!is_in_function(pd), ]
303258
sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL"))
304259

305260
if (length(sym_cond) == 0) {
@@ -327,7 +282,7 @@ extract_occurrence <- function(calls_pd) {
327282

328283
after <- match(min(x$id[ass_cond]), sort(x$id[c(min(ass_cond), sym_cond)])) - 1
329284
ans <- append(x[sym_cond, "text"], "<-", after = max(1, after))
330-
roll <- in_parenthesis(call_pd)
285+
roll <- in_parenthesis(pd)
331286
if (length(roll)) {
332287
c(setdiff(ans, roll), roll)
333288
} else {
@@ -336,8 +291,6 @@ extract_occurrence <- function(calls_pd) {
336291

337292
### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c.
338293
### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('.
339-
}
340-
)
341294
}
342295

343296
#' Extract side effects
@@ -350,24 +303,32 @@ extract_occurrence <- function(calls_pd) {
350303
#' With this tag a complete object dependency structure can be established.
351304
#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.
352305
#'
353-
#' @param calls_pd `list` of `data.frame`s;
354-
#' result of `utils::getParseData()` split into subsets representing individual calls;
306+
#' @param pd `data.frame`;
307+
#' one of the results of `utils::getParseData()` split into subsets representing individual calls;
355308
#' created by `extract_calls()` function
356309
#'
357310
#' @return
358-
#' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects
359-
#' depending a call tagged with `@linksto` in a corresponding element of `calls_pd`.
311+
#' A character vector of names of objects
312+
#' depending a call tagged with `@linksto` in a corresponding element of `pd`.
360313
#'
361314
#' @keywords internal
362315
#' @noRd
363-
extract_side_effects <- function(calls_pd) {
364-
lapply(
365-
calls_pd,
366-
function(x) {
367-
linksto <- grep("@linksto", x[x$token == "COMMENT", "text"], value = TRUE)
368-
unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+"))
369-
}
370-
)
316+
extract_side_effects <- function(pd) {
317+
linksto <- grep("@linksto", pd[pd$token == "COMMENT", "text"], value = TRUE)
318+
unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+"))
319+
}
320+
321+
#' @param parsed_code results of `parse(text = code, keep.source = TRUE` (parsed text)
322+
#' @keywords internal
323+
#' @noRd
324+
extract_dependency <- function(parsed_code) {
325+
pd <- normalize_pd(utils::getParseData(parsed_code))
326+
reordered_pd <- extract_calls(pd)[[1]]
327+
# extract_calls is needed to reorder the pd so that assignment operator comes before symbol names
328+
# extract_calls is needed also to substitute assignment operators into specific format with fix_arrows
329+
# extract_calls is needed to omit empty calls that contain only one token `"';'"`
330+
# This cleaning is needed as extract_occurrence assumes arrows are fixed, and order is different than in original pd.
331+
c(extract_side_effects(reordered_pd), extract_occurrence(reordered_pd))
371332
}
372333

373334
# graph_parser ----
@@ -414,30 +375,32 @@ graph_parser <- function(x, graph) {
414375
#'
415376
#' Detects `library()` and `require()` function calls.
416377
#'
417-
#' @param calls_pd `list` of `data.frame`s;
418-
#' result of `utils::getParseData()` split into subsets representing individual calls;
419-
#' created by `extract_calls()` function
378+
#' @param `graph` the dependency graph, result of `lapply(code, attr, "dependency")`
420379
#'
421380
#' @return
422-
#' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing
381+
#' Integer vector of indices that can be applied to `graph` to obtain all calls containing
423382
#' `library()` or `require()` calls that are always returned for reproducibility.
424383
#'
425384
#' @keywords internal
426385
#' @noRd
427-
detect_libraries <- function(calls_pd) {
386+
detect_libraries <- function(graph) {
428387
defaults <- c("library", "require")
429388

430389
which(
431-
vapply(
432-
calls_pd,
433-
function(call) {
434-
any(call$token == "SYMBOL_FUNCTION_CALL" & call$text %in% defaults)
435-
},
436-
logical(1)
390+
unlist(
391+
lapply(
392+
graph, function(x){
393+
any(grepl(pattern = paste(defaults, collapse = "|"), x = x))
394+
}
395+
)
437396
)
438397
)
439398
}
440399

400+
401+
# utils -----------------------------------------------------------------------------------------------------------
402+
403+
441404
#' Normalize parsed data removing backticks from symbols
442405
#'
443406
#' @param pd `data.frame` resulting from `utils::getParseData()` call.
@@ -454,6 +417,10 @@ normalize_pd <- function(pd) {
454417
pd
455418
}
456419

420+
421+
# split_code ------------------------------------------------------------------------------------------------------
422+
423+
457424
#' Get line/column in the source where the calls end
458425
#'
459426
#'
@@ -511,3 +478,4 @@ split_code <- function(code) {
511478
# semicolon is treated by R parser as a separate call.
512479
gsub("^([[:space:]])*;(.+)$", "\\1\\2", new_code, perl = TRUE)
513480
}
481+

man/qenv-class.Rd

Lines changed: 5 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-qenv_eval_code.R

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,30 @@ testthat::test_that(
133133
testthat::expect_null(attr(q@code, "message"))
134134
testthat::expect_null(attr(q@code, "warning"))
135135
})
136+
137+
testthat::test_that("eval_code returns a qenv object with dependency attribute", {
138+
q <- eval_code(qenv(), "iris_data <- head(iris)")
139+
testthat::expect_identical(get_code_attr(q, "dependency"), c("iris_data", "<-", "head", "iris"))
140+
})
141+
testthat::test_that("eval_code returns a qenv object with dependency attribute that contains linksto information", {
142+
q2 <- eval_code(qenv(), c("x <- 5", "iris_data <- head(iris)", "nrow(iris_data) #@linksto x"))
143+
testthat::expect_identical(
144+
lapply(q2@code, attr, "dependency"),
145+
list(
146+
c("x", "<-"),
147+
c("iris_data", "<-", "head", "iris"),
148+
c("x", "<-", "nrow", "iris_data")
149+
)
150+
)
151+
})
152+
testthat::test_that(
153+
"eval_code returns a qenv object with dependency attribute that extracts functions after '<-' part", {
154+
q3 <- eval_code(qenv(), c("library(survival)", "head(iris)"))
155+
testthat::expect_identical(
156+
lapply(q3@code, attr, "dependency"),
157+
list(
158+
c("<-", "library", "survival"),
159+
c("<-", "head", "iris")
160+
)
161+
)
162+
})

tests/testthat/test-qenv_get_code.R

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ testthat::test_that("detects cooccurrence properly even if all objects are on lh
688688
# @ ---------------------------------------------------------------------------------------------------------------
689689

690690
testthat::test_that("understands @ usage and do not treat rhs of @ as objects (only lhs)", {
691-
code <- list(
691+
code <- c(
692692
"setClass('aclass', slots = c(a = 'numeric', x = 'numeric', y = 'numeric')) # @linksto a x",
693693
"x <- new('aclass', a = 1:3, x = 1:3, y = 1:3)",
694694
"a <- new('aclass', a = 1:3, x = 1:3, y = 1:3)",
@@ -697,14 +697,14 @@ testthat::test_that("understands @ usage and do not treat rhs of @ as objects (o
697697
"a@x <- x@a"
698698
)
699699
q <- qenv()
700-
q@code <- code # we don't use eval_code so the code is not run
700+
q <- eval_code(q, code)
701701
testthat::expect_identical(
702702
get_code_g(q, names = "x"),
703-
unlist(code[1:2])
703+
code[1:2]
704704
)
705705
testthat::expect_identical(
706706
get_code_g(q, names = "a"),
707-
unlist(code)
707+
code
708708
)
709709
})
710710

@@ -886,3 +886,12 @@ testthat::describe("Backticked symbol", {
886886
)
887887
})
888888
})
889+
890+
891+
# missing objects -------------------------------------------------------------------------------------------------
892+
893+
testthat::test_that("get_code raises warning for missing names", {
894+
q <- eval_code(qenv(), code = c("a<-1;b<-2"))
895+
testthat::expect_null(get_code(q, names = 'c'))
896+
testthat::expect_warning(get_code(q, names = 'c'), " not found in code: c")
897+
})

0 commit comments

Comments
 (0)