@@ -211,108 +211,254 @@ sub_arrows <- function(call) {
211211# ' @keywords internal
212212# ' @noRd
213213extract_occurrence <- function (pd ) {
214- is_in_function <- function (x ) {
215- # If an object is a function parameter,
216- # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.
217- function_id <- x [x $ token == " FUNCTION" , " parent" ]
218- if (length(function_id )) {
219- x $ id %in% get_children(x , function_id [1 ])$ id
220- } else {
221- rep(FALSE , nrow(x ))
222- }
214+ # Handle special function calls first
215+ data_dependency <- handle_data_call(pd )
216+ if (! is.null(data_dependency )) {
217+ return (data_dependency )
223218 }
224- in_parenthesis <- function (x ) {
225- if (any(x $ token %in% c(" LBB" , " '['" ))) {
226- id_start <- min(x $ id [x $ token %in% c(" LBB" , " '['" )])
227- id_end <- min(x $ id [x $ token == " ']'" ])
228- x $ text [x $ token == " SYMBOL" & x $ id > id_start & x $ id < id_end ]
229- }
219+
220+ assign_dependency <- handle_assign_call(pd )
221+ if (! is.null(assign_dependency )) {
222+ return (assign_dependency )
230223 }
224+
225+ # Process general assignment expressions
226+ extract_general_assignment(pd )
227+ }
231228
232- # Handle data(object)/data("object")/data(object, envir = ) independently.
233- data_call <- find_call(pd , " data" )
234- if (data_call ) {
235- sym <- pd [data_call + 1 , " text" ]
236- return (c(gsub(" ^['\" ]|['\" ]$" , " " , sym ), " <-" ))
229+ # ' Handle data() function calls
230+ # '
231+ # ' Extracts dependencies for data() function calls like data(object) or data("object").
232+ # '
233+ # ' @param pd `data.frame` parse data for a single call
234+ # ' @return Character vector with dependency info or NULL if not a data call
235+ # ' @keywords internal
236+ # ' @noRd
237+ handle_data_call <- function (pd ) {
238+ data_call_pos <- find_call(pd , " data" )
239+ if (data_call_pos == 0 ) {
240+ return (NULL )
237241 }
238- # Handle assign(x = ).
239- assign_call <- find_call(pd , " assign" )
240- if (assign_call ) {
241- # Check if parameters were named.
242- # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.
243- # "EQ_SUB" is for `=` appearing after the name of the named parameter.
244- if (any(pd $ token == " SYMBOL_SUB" )) {
245- params <- pd [pd $ token %in% c(" SYMBOL_SUB" , " ','" , " EQ_SUB" ), " text" ]
246- # Remove sequence of "=", ",".
247- if (length(params > 1 )) {
248- remove <- integer(0 )
249- for (i in 2 : length(params )) {
250- if (params [i - 1 ] == " =" && params [i ] == " ," ) {
251- remove <- c(remove , i - 1 , i )
252- }
253- }
254- if (length(remove )) params <- params [- remove ]
255- }
256- pos <- match(" x" , setdiff(params , " ," ), nomatch = match(" ," , params , nomatch = 0 ))
257- if (! pos ) {
258- return (character (0L ))
259- }
260- # pos is indicator of the place of 'x'
261- # 1. All parameters are named, but none is 'x' - return(character(0L))
262- # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ","))
263- # - check "x" in params being just a vector of named parameters.
264- # 3. Some parameters are named, 'x' is not in named parameters
265- # - check first appearance of "," (unnamed parameter) in vector parameters.
266- } else {
267- # Object is the first entry after 'assign'.
268- pos <- 1
242+
243+ # Extract the object name from data(object)
244+ object_name <- pd [data_call_pos + 1 , " text" ]
245+ # Remove quotes if present: data("object") -> object
246+ clean_name <- gsub(" ^['\" ]|['\" ]$" , " " , object_name )
247+ c(clean_name , " <-" )
248+ }
249+
250+ # ' Handle assign() function calls
251+ # '
252+ # ' Extracts dependencies for assign() function calls with named or unnamed parameters.
253+ # '
254+ # ' @param pd `data.frame` parse data for a single call
255+ # ' @return Character vector with dependency info or NULL if not an assign call
256+ # ' @keywords internal
257+ # ' @noRd
258+ handle_assign_call <- function (pd ) {
259+ assign_call_pos <- find_call(pd , " assign" )
260+ if (assign_call_pos == 0 ) {
261+ return (NULL )
262+ }
263+
264+ param_position <- get_assign_param_position(pd )
265+ if (param_position == 0 ) {
266+ return (character (0L ))
267+ }
268+
269+ # Extract the assigned object name
270+ object_name <- pd [assign_call_pos + param_position , " text" ]
271+ # Remove quotes if present: assign("object", value) -> object
272+ clean_name <- gsub(" ^['\" ]|['\" ]$" , " " , object_name )
273+ c(clean_name , " <-" )
274+ }
275+
276+ # ' Get parameter position for assign() function
277+ # '
278+ # ' Determines the position of the 'x' parameter in assign() calls,
279+ # ' handling both named and unnamed parameter cases.
280+ # '
281+ # ' @param pd `data.frame` parse data for a single call
282+ # ' @return Integer position offset from assign call, or 0 if 'x' parameter not found
283+ # ' @keywords internal
284+ # ' @noRd
285+ get_assign_param_position <- function (pd ) {
286+ # Check if any parameters are named
287+ has_named_params <- any(pd $ token == " SYMBOL_SUB" )
288+
289+ if (has_named_params ) {
290+ # Extract parameter tokens: named parameters (SYMBOL_SUB), commas, equals
291+ param_tokens <- pd [pd $ token %in% c(" SYMBOL_SUB" , " ','" , " EQ_SUB" ), " text" ]
292+
293+ # Clean up sequences of "=" followed by ","
294+ cleaned_params <- remove_consecutive_equals_comma(param_tokens )
295+
296+ # Find position of 'x' parameter among named parameters
297+ pos <- match(" x" , setdiff(cleaned_params , " ," ), nomatch = 0 )
298+ if (pos == 0 ) {
299+ # If 'x' not found in named params, check for first unnamed parameter
300+ pos <- match(" ," , cleaned_params , nomatch = 0 )
269301 }
270- sym <- pd [assign_call + pos , " text" ]
271- return (c(gsub(" ^['\" ]|['\" ]$" , " " , sym ), " <-" ))
302+ return (pos )
303+ } else {
304+ # All parameters are unnamed, 'x' is the first parameter
305+ return (1 )
272306 }
307+ }
273308
274- # What occurs in a function body is not tracked.
275- x <- pd [! is_in_function(pd ), ]
276- sym_cond <- which(x $ token %in% c(" SPECIAL" , " SYMBOL" , " SYMBOL_FUNCTION_CALL" ))
277- sym_fc_cond <- which(x $ token == " SYMBOL_FUNCTION_CALL" )
309+ # ' Remove consecutive "=" and "," sequences from parameter tokens
310+ # '
311+ # ' @param params Character vector of parameter tokens
312+ # ' @return Character vector with "=", "," sequences removed
313+ # ' @keywords internal
314+ # ' @noRd
315+ remove_consecutive_equals_comma <- function (params ) {
316+ if (length(params ) < = 1 ) {
317+ return (params )
318+ }
319+
320+ indices_to_remove <- integer(0 )
321+ for (i in 2 : length(params )) {
322+ if (params [i - 1 ] == " =" && params [i ] == " ," ) {
323+ indices_to_remove <- c(indices_to_remove , i - 1 , i )
324+ }
325+ }
326+
327+ if (length(indices_to_remove ) > 0 ) {
328+ params [- indices_to_remove ]
329+ } else {
330+ params
331+ }
332+ }
278333
279- if (length(sym_cond ) == 0 ) {
334+ # ' Extract dependencies from general assignment expressions
335+ # '
336+ # ' Processes standard assignment operations (<-, =, ->) and extracts
337+ # ' object dependencies while handling special cases.
338+ # '
339+ # ' @param pd `data.frame` parse data for a single call
340+ # ' @return Character vector with dependency information
341+ # ' @keywords internal
342+ # ' @noRd
343+ extract_general_assignment <- function (pd ) {
344+ # Filter out symbols that are function parameters
345+ filtered_pd <- pd [! is_symbol_in_function_body(pd ), ]
346+
347+ # Find all symbol positions
348+ symbol_indices <- which(filtered_pd $ token %in% c(" SPECIAL" , " SYMBOL" , " SYMBOL_FUNCTION_CALL" ))
349+ function_call_indices <- which(filtered_pd $ token == " SYMBOL_FUNCTION_CALL" )
350+
351+ if (length(symbol_indices ) == 0 ) {
280352 return (character (0L ))
281353 }
282- # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.
283- # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2.
284- dollar_ids <- x [x $ token %in% c(" '$'" , " '@'" ), " id" ]
285- if (length(dollar_ids )) {
286- object_ids <- x [sym_cond , " id" ]
287- after_dollar <- object_ids [(object_ids - 2 ) %in% dollar_ids ]
288- sym_cond <- setdiff(sym_cond , which(x $ id %in% after_dollar ))
354+
355+ # Remove symbols that come after $ or @ operators (e.g., in x$a, remove 'a')
356+ symbol_indices <- exclude_symbols_after_operators(filtered_pd , symbol_indices )
357+
358+ # Look for assignment operators
359+ assignment_indices <- grep(" ASSIGN" , filtered_pd $ token )
360+ if (length(assignment_indices ) == 0 ) {
361+ # No assignment found, return all symbols as dependencies
362+ return (c(" <-" , unique(filtered_pd [symbol_indices , " text" ])))
289363 }
364+
365+ # Process assignment expression
366+ process_assignment_expression(filtered_pd , symbol_indices , function_call_indices , assignment_indices , pd )
367+ }
290368
291- assign_cond <- grep(" ASSIGN" , x $ token )
292- if (! length(assign_cond )) {
293- return (c(" <-" , unique(x [sym_cond , " text" ])))
369+ # ' Check if symbols are in function body
370+ # '
371+ # ' @param pd `data.frame` parse data
372+ # ' @return Logical vector indicating which rows are function parameters
373+ # ' @keywords internal
374+ # ' @noRd
375+ is_symbol_in_function_body <- function (pd ) {
376+ function_ids <- pd [pd $ token == " FUNCTION" , " parent" ]
377+ if (length(function_ids ) == 0 ) {
378+ return (rep(FALSE , nrow(pd )))
294379 }
380+
381+ # Get all children of function definitions
382+ function_children <- get_children(pd , function_ids [1 ])$ id
383+ pd $ id %in% function_children
384+ }
295385
296- # For cases like 'eval(expression(c <- b + 2))' removes 'eval(expression('.
297- sym_cond <- sym_cond [! (sym_cond < min(assign_cond ) & sym_cond %in% sym_fc_cond )]
298-
299- # If there was an assignment operation detect direction of it.
300- if (unique(x $ text [assign_cond ]) == " ->" ) { # What if there are 2 assignments: e.g. a <- b -> c.
301- sym_cond <- rev(sym_cond )
386+ # ' Exclude symbols that appear after $ or @ operators
387+ # '
388+ # ' @param pd `data.frame` parse data
389+ # ' @param symbol_indices Integer vector of symbol positions
390+ # ' @return Integer vector of filtered symbol positions
391+ # ' @keywords internal
392+ # ' @noRd
393+ exclude_symbols_after_operators <- function (pd , symbol_indices ) {
394+ # Find $ and @ operator positions
395+ operator_ids <- pd [pd $ token %in% c(" '$'" , " '@'" ), " id" ]
396+ if (length(operator_ids ) == 0 ) {
397+ return (symbol_indices )
302398 }
399+
400+ # For x$a, a's ID is $'s ID-2, so we exclude symbols with ID = operator_ID - 2
401+ symbol_ids <- pd [symbol_indices , " id" ]
402+ symbols_after_operators <- symbol_ids [(symbol_ids - 2 ) %in% operator_ids ]
403+
404+ # Remove these symbols from our indices
405+ setdiff(symbol_indices , which(pd $ id %in% symbols_after_operators ))
406+ }
303407
304- after <- match(min(x $ id [assign_cond ]), sort(x $ id [c(min(assign_cond ), sym_cond )])) - 1
305- ans <- append(x [sym_cond , " text" ], " <-" , after = max(1 , after ))
306- ans <- move_functions_after_arrow(ans , unique(x [sym_fc_cond , " text" ]))
307- roll <- in_parenthesis(pd )
308- if (length(roll )) {
309- c(setdiff(ans , roll ), roll )
408+ # ' Process assignment expression and build dependency vector
409+ # '
410+ # ' @param pd `data.frame` filtered parse data
411+ # ' @param symbol_indices Integer vector of symbol positions
412+ # ' @param function_call_indices Integer vector of function call positions
413+ # ' @param assignment_indices Integer vector of assignment operator positions
414+ # ' @param original_pd `data.frame` original parse data for bracket processing
415+ # ' @return Character vector with dependency information
416+ # ' @keywords internal
417+ # ' @noRd
418+ process_assignment_expression <- function (pd , symbol_indices , function_call_indices , assignment_indices , original_pd ) {
419+ # Remove function calls that appear before assignment (e.g., in eval(expression(c <- b)))
420+ symbol_indices <- symbol_indices [! (symbol_indices < min(assignment_indices ) & symbol_indices %in% function_call_indices )]
421+
422+ # Handle right-to-left assignment (->) by reversing symbol order
423+ if (unique(pd $ text [assignment_indices ]) == " ->" ) {
424+ symbol_indices <- rev(symbol_indices )
425+ }
426+
427+ # Build dependency vector with assignment operator in correct position
428+ assignment_pos <- match(min(pd $ id [assignment_indices ]), sort(pd $ id [c(min(assignment_indices ), symbol_indices )])) - 1
429+ dependency_vector <- append(pd [symbol_indices , " text" ], " <-" , after = max(1 , assignment_pos ))
430+
431+ # Move function names to right side of dependency arrow
432+ dependency_vector <- move_functions_after_arrow(dependency_vector , unique(pd [function_call_indices , " text" ]))
433+
434+ # Handle symbols in brackets/parentheses
435+ bracket_symbols <- extract_symbols_in_brackets(original_pd )
436+ if (length(bracket_symbols ) > 0 ) {
437+ c(setdiff(dependency_vector , bracket_symbols ), bracket_symbols )
310438 } else {
311- ans
439+ dependency_vector
312440 }
313441}
314442
315- # ' Moves function names to the right side of dependency graph
443+ # ' Extract symbols that appear within brackets or parentheses
444+ # '
445+ # ' @param pd `data.frame` parse data
446+ # ' @return Character vector of symbols in brackets, or NULL
447+ # ' @keywords internal
448+ # ' @noRd
449+ extract_symbols_in_brackets <- function (pd ) {
450+ # Look for bracket/parenthesis tokens
451+ has_brackets <- any(pd $ token %in% c(" LBB" , " '['" ))
452+ if (! has_brackets ) {
453+ return (NULL )
454+ }
455+
456+ start_id <- min(pd $ id [pd $ token %in% c(" LBB" , " '['" )])
457+ end_id <- min(pd $ id [pd $ token == " ']'" ])
458+
459+ # Extract symbols between brackets
460+ pd $ text [pd $ token == " SYMBOL" & pd $ id > start_id & pd $ id < end_id ]
461+ }
316462# '
317463# ' Changes status of the function call from dependent to dependency if occurs in the lhs.
318464# ' Technically, it means to move function names after the dependency operator.
0 commit comments