@@ -33,37 +33,22 @@ get_code_dependency <- function(code, names, check_names = TRUE) {
3333 return (code )
3434 }
3535
36- # If code is bound in curly brackets, remove them.
37- # TODO: rethink if this is still needed when code is divided by calls?
38- tcode <- trimws(code )
39- if (any(grepl(" ^\\ {.*\\ }$" , tcode ))) {
40- tcode <- sub(" ^\\ {(.*)\\ }$" , " \\ 1" , tcode )
41- }
42-
43- parsed_code <- parse(text = tcode , keep.source = TRUE )
44-
45- pd <- utils :: getParseData(parsed_code )
46- pd <- normalize_pd(pd )
47- calls_pd <- extract_calls(pd )
36+ graph <- lapply(code , attr , " dependency" )
4837
4938 if (check_names ) {
50- # Detect if names are actually in code.
51- symbols <- unlist(lapply(calls_pd , function (call ) call [call $ token == " SYMBOL" , " text" ]))
52- if (any(pd $ text == " assign" )) {
53- assign_calls <- Filter(function (call ) find_call(call , " assign" ), calls_pd )
54- ass_str <- unlist(lapply(assign_calls , function (call ) call [call $ token == " STR_CONST" , " text" ]))
55- ass_str <- gsub(" ^['\" ]|['\" ]$" , " " , ass_str )
56- symbols <- c(ass_str , symbols )
57- }
39+ symbols <- unlist(lapply(graph , function (call ) {
40+ ind <- match(" <-" , call , nomatch = length(call ) + 1L )
41+ call [seq_len(ind - 1L )]
42+ }))
43+
5844 if (! all(names %in% unique(symbols ))) {
5945 warning(" Object(s) not found in code: " , toString(setdiff(names , symbols )))
6046 }
6147 }
6248
63- graph <- code_graph(calls_pd )
6449 ind <- unlist(lapply(names , function (x ) graph_parser(x , graph )))
6550
66- lib_ind <- detect_libraries(calls_pd )
51+ lib_ind <- detect_libraries(graph )
6752
6853 code_ids <- sort(unique(c(lib_ind , ind )))
6954 code [code_ids ]
@@ -189,53 +174,25 @@ sub_arrows <- function(call) {
189174
190175# code_graph ----
191176
192- # ' Create object dependencies graph within parsed code
193- # '
194- # ' Builds dependency graph that identifies dependencies between objects in parsed code.
195- # ' Helps understand which objects depend on which.
196- # '
197- # ' @param calls_pd `list` of `data.frame`s;
198- # ' result of `utils::getParseData()` split into subsets representing individual calls;
199- # ' created by `extract_calls()` function
200- # '
201- # ' @return
202- # ' A list (of length of input `calls_pd`) where each element represents one call.
203- # ' Each element is a character vector listing names of objects that depend on this call
204- # ' and names of objects that this call depends on.
205- # ' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
206- # ' depends on objects `b` and `c`.
207- # ' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
208- # '
209- # ' @keywords internal
210- # ' @noRd
211- code_graph <- function (calls_pd ) {
212- cooccurrence <- extract_occurrence(calls_pd )
213-
214- side_effects <- extract_side_effects(calls_pd )
215-
216- mapply(c , side_effects , cooccurrence , SIMPLIFY = FALSE )
217- }
218-
219177# ' Extract object occurrence
220178# '
221- # ' Extracts objects occurrence within calls passed by `calls_pd `.
179+ # ' Extracts objects occurrence within calls passed by `pd `.
222180# ' Also detects which objects depend on which within a call.
223181# '
224- # ' @param calls_pd `list` of ` data.frame`s ;
225- # ' result of `utils::getParseData()` split into subsets representing individual calls;
182+ # ' @param pd ` data.frame`;
183+ # ' one of the results of `utils::getParseData()` split into subsets representing individual calls;
226184# ' created by `extract_calls()` function
227185# '
228186# ' @return
229- # ' A list (of length of input `calls_pd`) where each element represents one call.
230- # ' Each element is a character vector listing names of objects that depend on this call
187+ # ' A character vector listing names of objects that depend on this call
231188# ' and names of objects that this call depends on.
232189# ' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
233190# ' depends on objects `b` and `c`.
234191# ' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
235192# '
236193# ' @keywords internal
237194# ' @noRd
238- extract_occurrence <- function (calls_pd ) {
195+ extract_occurrence <- function (pd ) {
239196 is_in_function <- function (x ) {
240197 # If an object is a function parameter,
241198 # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.
@@ -253,23 +210,21 @@ extract_occurrence <- function(calls_pd) {
253210 x $ text [x $ token == " SYMBOL" & x $ id > id_start & x $ id < id_end ]
254211 }
255212 }
256- lapply(
257- calls_pd ,
258- function (call_pd ) {
213+
259214 # Handle data(object)/data("object")/data(object, envir = ) independently.
260- data_call <- find_call(call_pd , " data" )
215+ data_call <- find_call(pd , " data" )
261216 if (data_call ) {
262- sym <- call_pd [data_call + 1 , " text" ]
217+ sym <- pd [data_call + 1 , " text" ]
263218 return (c(gsub(" ^['\" ]|['\" ]$" , " " , sym ), " <-" ))
264219 }
265220 # Handle assign(x = ).
266- assign_call <- find_call(call_pd , " assign" )
221+ assign_call <- find_call(pd , " assign" )
267222 if (assign_call ) {
268223 # Check if parameters were named.
269224 # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.
270225 # "EQ_SUB" is for `=` appearing after the name of the named parameter.
271- if (any(call_pd $ token == " SYMBOL_SUB" )) {
272- params <- call_pd [ call_pd $ token %in% c(" SYMBOL_SUB" , " ','" , " EQ_SUB" ), " text" ]
226+ if (any(pd $ token == " SYMBOL_SUB" )) {
227+ params <- pd [ pd $ token %in% c(" SYMBOL_SUB" , " ','" , " EQ_SUB" ), " text" ]
273228 # Remove sequence of "=", ",".
274229 if (length(params > 1 )) {
275230 remove <- integer(0 )
@@ -294,12 +249,12 @@ extract_occurrence <- function(calls_pd) {
294249 # Object is the first entry after 'assign'.
295250 pos <- 1
296251 }
297- sym <- call_pd [assign_call + pos , " text" ]
252+ sym <- pd [assign_call + pos , " text" ]
298253 return (c(gsub(" ^['\" ]|['\" ]$" , " " , sym ), " <-" ))
299254 }
300255
301256 # What occurs in a function body is not tracked.
302- x <- call_pd [! is_in_function(call_pd ), ]
257+ x <- pd [! is_in_function(pd ), ]
303258 sym_cond <- which(x $ token %in% c(" SPECIAL" , " SYMBOL" , " SYMBOL_FUNCTION_CALL" ))
304259
305260 if (length(sym_cond ) == 0 ) {
@@ -327,7 +282,7 @@ extract_occurrence <- function(calls_pd) {
327282
328283 after <- match(min(x $ id [ass_cond ]), sort(x $ id [c(min(ass_cond ), sym_cond )])) - 1
329284 ans <- append(x [sym_cond , " text" ], " <-" , after = max(1 , after ))
330- roll <- in_parenthesis(call_pd )
285+ roll <- in_parenthesis(pd )
331286 if (length(roll )) {
332287 c(setdiff(ans , roll ), roll )
333288 } else {
@@ -336,8 +291,6 @@ extract_occurrence <- function(calls_pd) {
336291
337292 # ## NOTE 2: What if there are 2 assignments: e.g. a <- b -> c.
338293 # ## NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('.
339- }
340- )
341294}
342295
343296# ' Extract side effects
@@ -350,24 +303,32 @@ extract_occurrence <- function(calls_pd) {
350303# ' With this tag a complete object dependency structure can be established.
351304# ' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.
352305# '
353- # ' @param calls_pd `list` of ` data.frame`s ;
354- # ' result of `utils::getParseData()` split into subsets representing individual calls;
306+ # ' @param pd ` data.frame`;
307+ # ' one of the results of `utils::getParseData()` split into subsets representing individual calls;
355308# ' created by `extract_calls()` function
356309# '
357310# ' @return
358- # ' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects
359- # ' depending a call tagged with `@linksto` in a corresponding element of `calls_pd `.
311+ # ' A character vector of names of objects
312+ # ' depending a call tagged with `@linksto` in a corresponding element of `pd `.
360313# '
361314# ' @keywords internal
362315# ' @noRd
363- extract_side_effects <- function (calls_pd ) {
364- lapply(
365- calls_pd ,
366- function (x ) {
367- linksto <- grep(" @linksto" , x [x $ token == " COMMENT" , " text" ], value = TRUE )
368- unlist(strsplit(sub(" \\ s*#\\ s*@linksto\\ s+" , " " , linksto ), " \\ s+" ))
369- }
370- )
316+ extract_side_effects <- function (pd ) {
317+ linksto <- grep(" @linksto" , pd [pd $ token == " COMMENT" , " text" ], value = TRUE )
318+ unlist(strsplit(sub(" \\ s*#\\ s*@linksto\\ s+" , " " , linksto ), " \\ s+" ))
319+ }
320+
321+ # ' @param parsed_code results of `parse(text = code, keep.source = TRUE` (parsed text)
322+ # ' @keywords internal
323+ # ' @noRd
324+ extract_dependency <- function (parsed_code ) {
325+ pd <- normalize_pd(utils :: getParseData(parsed_code ))
326+ reordered_pd <- extract_calls(pd )[[1 ]]
327+ # extract_calls is needed to reorder the pd so that assignment operator comes before symbol names
328+ # extract_calls is needed also to substitute assignment operators into specific format with fix_arrows
329+ # extract_calls is needed to omit empty calls that contain only one token `"';'"`
330+ # This cleaning is needed as extract_occurrence assumes arrows are fixed, and order is different than in original pd.
331+ c(extract_side_effects(reordered_pd ), extract_occurrence(reordered_pd ))
371332}
372333
373334# graph_parser ----
@@ -414,30 +375,32 @@ graph_parser <- function(x, graph) {
414375# '
415376# ' Detects `library()` and `require()` function calls.
416377# '
417- # ' @param calls_pd `list` of `data.frame`s;
418- # ' result of `utils::getParseData()` split into subsets representing individual calls;
419- # ' created by `extract_calls()` function
378+ # ' @param `graph` the dependency graph, result of `lapply(code, attr, "dependency")`
420379# '
421380# ' @return
422- # ' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing
381+ # ' Integer vector of indices that can be applied to `graph` to obtain all calls containing
423382# ' `library()` or `require()` calls that are always returned for reproducibility.
424383# '
425384# ' @keywords internal
426385# ' @noRd
427- detect_libraries <- function (calls_pd ) {
386+ detect_libraries <- function (graph ) {
428387 defaults <- c(" library" , " require" )
429388
430389 which(
431- vapply (
432- calls_pd ,
433- function (call ) {
434- any(call $ token = = " SYMBOL_FUNCTION_CALL " & call $ text %in% defaults )
435- },
436- logical ( 1 )
390+ unlist (
391+ lapply(
392+ graph , function (x ) {
393+ any(grepl( pattern = paste( defaults , collapse = " | " ), x = x ) )
394+ }
395+ )
437396 )
438397 )
439398}
440399
400+
401+ # utils -----------------------------------------------------------------------------------------------------------
402+
403+
441404# ' Normalize parsed data removing backticks from symbols
442405# '
443406# ' @param pd `data.frame` resulting from `utils::getParseData()` call.
@@ -454,6 +417,10 @@ normalize_pd <- function(pd) {
454417 pd
455418}
456419
420+
421+ # split_code ------------------------------------------------------------------------------------------------------
422+
423+
457424# ' Get line/column in the source where the calls end
458425# '
459426# '
@@ -511,3 +478,4 @@ split_code <- function(code) {
511478 # semicolon is treated by R parser as a separate call.
512479 gsub(" ^([[:space:]])*;(.+)$" , " \\ 1\\ 2" , new_code , perl = TRUE )
513480}
481+
0 commit comments