Skip to content

Commit 3e5048e

Browse files
Copilotm7pr
andcommitted
Refactor get_code_dependency function for improved readability
Co-authored-by: m7pr <[email protected]>
1 parent e53c0c4 commit 3e5048e

File tree

1 file changed

+228
-82
lines changed

1 file changed

+228
-82
lines changed

R/utils-get_code_dependency.R

Lines changed: 228 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -211,108 +211,254 @@ sub_arrows <- function(call) {
211211
#' @keywords internal
212212
#' @noRd
213213
extract_occurrence <- function(pd) {
214-
is_in_function <- function(x) {
215-
# If an object is a function parameter,
216-
# then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.
217-
function_id <- x[x$token == "FUNCTION", "parent"]
218-
if (length(function_id)) {
219-
x$id %in% get_children(x, function_id[1])$id
220-
} else {
221-
rep(FALSE, nrow(x))
222-
}
214+
# Handle special function calls first
215+
data_dependency <- handle_data_call(pd)
216+
if (!is.null(data_dependency)) {
217+
return(data_dependency)
223218
}
224-
in_parenthesis <- function(x) {
225-
if (any(x$token %in% c("LBB", "'['"))) {
226-
id_start <- min(x$id[x$token %in% c("LBB", "'['")])
227-
id_end <- min(x$id[x$token == "']'"])
228-
x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end]
229-
}
219+
220+
assign_dependency <- handle_assign_call(pd)
221+
if (!is.null(assign_dependency)) {
222+
return(assign_dependency)
230223
}
224+
225+
# Process general assignment expressions
226+
extract_general_assignment(pd)
227+
}
231228

232-
# Handle data(object)/data("object")/data(object, envir = ) independently.
233-
data_call <- find_call(pd, "data")
234-
if (data_call) {
235-
sym <- pd[data_call + 1, "text"]
236-
return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
229+
#' Handle data() function calls
230+
#'
231+
#' Extracts dependencies for data() function calls like data(object) or data("object").
232+
#'
233+
#' @param pd `data.frame` parse data for a single call
234+
#' @return Character vector with dependency info or NULL if not a data call
235+
#' @keywords internal
236+
#' @noRd
237+
handle_data_call <- function(pd) {
238+
data_call_pos <- find_call(pd, "data")
239+
if (data_call_pos == 0) {
240+
return(NULL)
237241
}
238-
# Handle assign(x = ).
239-
assign_call <- find_call(pd, "assign")
240-
if (assign_call) {
241-
# Check if parameters were named.
242-
# "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.
243-
# "EQ_SUB" is for `=` appearing after the name of the named parameter.
244-
if (any(pd$token == "SYMBOL_SUB")) {
245-
params <- pd[pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
246-
# Remove sequence of "=", ",".
247-
if (length(params > 1)) {
248-
remove <- integer(0)
249-
for (i in 2:length(params)) {
250-
if (params[i - 1] == "=" && params[i] == ",") {
251-
remove <- c(remove, i - 1, i)
252-
}
253-
}
254-
if (length(remove)) params <- params[-remove]
255-
}
256-
pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0))
257-
if (!pos) {
258-
return(character(0L))
259-
}
260-
# pos is indicator of the place of 'x'
261-
# 1. All parameters are named, but none is 'x' - return(character(0L))
262-
# 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ","))
263-
# - check "x" in params being just a vector of named parameters.
264-
# 3. Some parameters are named, 'x' is not in named parameters
265-
# - check first appearance of "," (unnamed parameter) in vector parameters.
266-
} else {
267-
# Object is the first entry after 'assign'.
268-
pos <- 1
242+
243+
# Extract the object name from data(object)
244+
object_name <- pd[data_call_pos + 1, "text"]
245+
# Remove quotes if present: data("object") -> object
246+
clean_name <- gsub("^['\"]|['\"]$", "", object_name)
247+
c(clean_name, "<-")
248+
}
249+
250+
#' Handle assign() function calls
251+
#'
252+
#' Extracts dependencies for assign() function calls with named or unnamed parameters.
253+
#'
254+
#' @param pd `data.frame` parse data for a single call
255+
#' @return Character vector with dependency info or NULL if not an assign call
256+
#' @keywords internal
257+
#' @noRd
258+
handle_assign_call <- function(pd) {
259+
assign_call_pos <- find_call(pd, "assign")
260+
if (assign_call_pos == 0) {
261+
return(NULL)
262+
}
263+
264+
param_position <- get_assign_param_position(pd)
265+
if (param_position == 0) {
266+
return(character(0L))
267+
}
268+
269+
# Extract the assigned object name
270+
object_name <- pd[assign_call_pos + param_position, "text"]
271+
# Remove quotes if present: assign("object", value) -> object
272+
clean_name <- gsub("^['\"]|['\"]$", "", object_name)
273+
c(clean_name, "<-")
274+
}
275+
276+
#' Get parameter position for assign() function
277+
#'
278+
#' Determines the position of the 'x' parameter in assign() calls,
279+
#' handling both named and unnamed parameter cases.
280+
#'
281+
#' @param pd `data.frame` parse data for a single call
282+
#' @return Integer position offset from assign call, or 0 if 'x' parameter not found
283+
#' @keywords internal
284+
#' @noRd
285+
get_assign_param_position <- function(pd) {
286+
# Check if any parameters are named
287+
has_named_params <- any(pd$token == "SYMBOL_SUB")
288+
289+
if (has_named_params) {
290+
# Extract parameter tokens: named parameters (SYMBOL_SUB), commas, equals
291+
param_tokens <- pd[pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
292+
293+
# Clean up sequences of "=" followed by ","
294+
cleaned_params <- remove_consecutive_equals_comma(param_tokens)
295+
296+
# Find position of 'x' parameter among named parameters
297+
pos <- match("x", setdiff(cleaned_params, ","), nomatch = 0)
298+
if (pos == 0) {
299+
# If 'x' not found in named params, check for first unnamed parameter
300+
pos <- match(",", cleaned_params, nomatch = 0)
269301
}
270-
sym <- pd[assign_call + pos, "text"]
271-
return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
302+
return(pos)
303+
} else {
304+
# All parameters are unnamed, 'x' is the first parameter
305+
return(1)
272306
}
307+
}
273308

274-
# What occurs in a function body is not tracked.
275-
x <- pd[!is_in_function(pd), ]
276-
sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL"))
277-
sym_fc_cond <- which(x$token == "SYMBOL_FUNCTION_CALL")
309+
#' Remove consecutive "=" and "," sequences from parameter tokens
310+
#'
311+
#' @param params Character vector of parameter tokens
312+
#' @return Character vector with "=", "," sequences removed
313+
#' @keywords internal
314+
#' @noRd
315+
remove_consecutive_equals_comma <- function(params) {
316+
if (length(params) <= 1) {
317+
return(params)
318+
}
319+
320+
indices_to_remove <- integer(0)
321+
for (i in 2:length(params)) {
322+
if (params[i - 1] == "=" && params[i] == ",") {
323+
indices_to_remove <- c(indices_to_remove, i - 1, i)
324+
}
325+
}
326+
327+
if (length(indices_to_remove) > 0) {
328+
params[-indices_to_remove]
329+
} else {
330+
params
331+
}
332+
}
278333

279-
if (length(sym_cond) == 0) {
334+
#' Extract dependencies from general assignment expressions
335+
#'
336+
#' Processes standard assignment operations (<-, =, ->) and extracts
337+
#' object dependencies while handling special cases.
338+
#'
339+
#' @param pd `data.frame` parse data for a single call
340+
#' @return Character vector with dependency information
341+
#' @keywords internal
342+
#' @noRd
343+
extract_general_assignment <- function(pd) {
344+
# Filter out symbols that are function parameters
345+
filtered_pd <- pd[!is_symbol_in_function_body(pd), ]
346+
347+
# Find all symbol positions
348+
symbol_indices <- which(filtered_pd$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL"))
349+
function_call_indices <- which(filtered_pd$token == "SYMBOL_FUNCTION_CALL")
350+
351+
if (length(symbol_indices) == 0) {
280352
return(character(0L))
281353
}
282-
# Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.
283-
# For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2.
284-
dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"]
285-
if (length(dollar_ids)) {
286-
object_ids <- x[sym_cond, "id"]
287-
after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids]
288-
sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar))
354+
355+
# Remove symbols that come after $ or @ operators (e.g., in x$a, remove 'a')
356+
symbol_indices <- exclude_symbols_after_operators(filtered_pd, symbol_indices)
357+
358+
# Look for assignment operators
359+
assignment_indices <- grep("ASSIGN", filtered_pd$token)
360+
if (length(assignment_indices) == 0) {
361+
# No assignment found, return all symbols as dependencies
362+
return(c("<-", unique(filtered_pd[symbol_indices, "text"])))
289363
}
364+
365+
# Process assignment expression
366+
process_assignment_expression(filtered_pd, symbol_indices, function_call_indices, assignment_indices, pd)
367+
}
290368

291-
assign_cond <- grep("ASSIGN", x$token)
292-
if (!length(assign_cond)) {
293-
return(c("<-", unique(x[sym_cond, "text"])))
369+
#' Check if symbols are in function body
370+
#'
371+
#' @param pd `data.frame` parse data
372+
#' @return Logical vector indicating which rows are function parameters
373+
#' @keywords internal
374+
#' @noRd
375+
is_symbol_in_function_body <- function(pd) {
376+
function_ids <- pd[pd$token == "FUNCTION", "parent"]
377+
if (length(function_ids) == 0) {
378+
return(rep(FALSE, nrow(pd)))
294379
}
380+
381+
# Get all children of function definitions
382+
function_children <- get_children(pd, function_ids[1])$id
383+
pd$id %in% function_children
384+
}
295385

296-
# For cases like 'eval(expression(c <- b + 2))' removes 'eval(expression('.
297-
sym_cond <- sym_cond[!(sym_cond < min(assign_cond) & sym_cond %in% sym_fc_cond)]
298-
299-
# If there was an assignment operation detect direction of it.
300-
if (unique(x$text[assign_cond]) == "->") { # What if there are 2 assignments: e.g. a <- b -> c.
301-
sym_cond <- rev(sym_cond)
386+
#' Exclude symbols that appear after $ or @ operators
387+
#'
388+
#' @param pd `data.frame` parse data
389+
#' @param symbol_indices Integer vector of symbol positions
390+
#' @return Integer vector of filtered symbol positions
391+
#' @keywords internal
392+
#' @noRd
393+
exclude_symbols_after_operators <- function(pd, symbol_indices) {
394+
# Find $ and @ operator positions
395+
operator_ids <- pd[pd$token %in% c("'$'", "'@'"), "id"]
396+
if (length(operator_ids) == 0) {
397+
return(symbol_indices)
302398
}
399+
400+
# For x$a, a's ID is $'s ID-2, so we exclude symbols with ID = operator_ID - 2
401+
symbol_ids <- pd[symbol_indices, "id"]
402+
symbols_after_operators <- symbol_ids[(symbol_ids - 2) %in% operator_ids]
403+
404+
# Remove these symbols from our indices
405+
setdiff(symbol_indices, which(pd$id %in% symbols_after_operators))
406+
}
303407

304-
after <- match(min(x$id[assign_cond]), sort(x$id[c(min(assign_cond), sym_cond)])) - 1
305-
ans <- append(x[sym_cond, "text"], "<-", after = max(1, after))
306-
ans <- move_functions_after_arrow(ans, unique(x[sym_fc_cond, "text"]))
307-
roll <- in_parenthesis(pd)
308-
if (length(roll)) {
309-
c(setdiff(ans, roll), roll)
408+
#' Process assignment expression and build dependency vector
409+
#'
410+
#' @param pd `data.frame` filtered parse data
411+
#' @param symbol_indices Integer vector of symbol positions
412+
#' @param function_call_indices Integer vector of function call positions
413+
#' @param assignment_indices Integer vector of assignment operator positions
414+
#' @param original_pd `data.frame` original parse data for bracket processing
415+
#' @return Character vector with dependency information
416+
#' @keywords internal
417+
#' @noRd
418+
process_assignment_expression <- function(pd, symbol_indices, function_call_indices, assignment_indices, original_pd) {
419+
# Remove function calls that appear before assignment (e.g., in eval(expression(c <- b)))
420+
symbol_indices <- symbol_indices[!(symbol_indices < min(assignment_indices) & symbol_indices %in% function_call_indices)]
421+
422+
# Handle right-to-left assignment (->) by reversing symbol order
423+
if (unique(pd$text[assignment_indices]) == "->") {
424+
symbol_indices <- rev(symbol_indices)
425+
}
426+
427+
# Build dependency vector with assignment operator in correct position
428+
assignment_pos <- match(min(pd$id[assignment_indices]), sort(pd$id[c(min(assignment_indices), symbol_indices)])) - 1
429+
dependency_vector <- append(pd[symbol_indices, "text"], "<-", after = max(1, assignment_pos))
430+
431+
# Move function names to right side of dependency arrow
432+
dependency_vector <- move_functions_after_arrow(dependency_vector, unique(pd[function_call_indices, "text"]))
433+
434+
# Handle symbols in brackets/parentheses
435+
bracket_symbols <- extract_symbols_in_brackets(original_pd)
436+
if (length(bracket_symbols) > 0) {
437+
c(setdiff(dependency_vector, bracket_symbols), bracket_symbols)
310438
} else {
311-
ans
439+
dependency_vector
312440
}
313441
}
314442

315-
#' Moves function names to the right side of dependency graph
443+
#' Extract symbols that appear within brackets or parentheses
444+
#'
445+
#' @param pd `data.frame` parse data
446+
#' @return Character vector of symbols in brackets, or NULL
447+
#' @keywords internal
448+
#' @noRd
449+
extract_symbols_in_brackets <- function(pd) {
450+
# Look for bracket/parenthesis tokens
451+
has_brackets <- any(pd$token %in% c("LBB", "'['"))
452+
if (!has_brackets) {
453+
return(NULL)
454+
}
455+
456+
start_id <- min(pd$id[pd$token %in% c("LBB", "'['")])
457+
end_id <- min(pd$id[pd$token == "']'"])
458+
459+
# Extract symbols between brackets
460+
pd$text[pd$token == "SYMBOL" & pd$id > start_id & pd$id < end_id]
461+
}
316462
#'
317463
#' Changes status of the function call from dependent to dependency if occurs in the lhs.
318464
#' Technically, it means to move function names after the dependency operator.

0 commit comments

Comments
 (0)