@@ -110,25 +110,24 @@ find_call <- function(call_pd, text) {
110110# ' @noRd
111111extract_calls <- function (pd ) {
112112 calls <- lapply(
113- pd [pd $ parent == 0 , " id" ],
113+ pd [pd $ parent == 0 & pd $ token != " COMMENT " , " id" ],
114114 function (parent ) {
115115 rbind(
116- pd [pd $ id == parent , c( " token " , " text " , " id " , " parent " ) ],
116+ pd [pd $ id == parent , ],
117117 get_children(pd = pd , parent = parent )
118118 )
119119 }
120120 )
121121 calls <- Filter(function (call ) ! (nrow(call ) == 1 && call $ token == " ';'" ), calls )
122122 calls <- Filter(Negate(is.null ), calls )
123- calls <- fix_shifted_comments(calls )
124123 fix_arrows(calls )
125124}
126125
127126# ' @keywords internal
128127# ' @noRd
129128get_children <- function (pd , parent ) {
130129 idx_children <- abs(pd $ parent ) == parent
131- children <- pd [idx_children , c( " token " , " text " , " id " , " parent " ) ]
130+ children <- pd [idx_children , ]
132131 if (nrow(children ) == 0 ) {
133132 return (NULL )
134133 }
@@ -454,71 +453,29 @@ normalize_pd <- function(pd) {
454453 pd
455454}
456455
457- # ' Get line and cols ids of starts and ends of calls
456+ # ' Get line/column in the source where the calls end
458457# '
459- # ' @param pd `data.frame` resulting from `utils::getParseData()` call.
460458# '
461- # ' @return list of `data.frames` containing number of lines and columns of starts and ends of calls included in `pd`.
459+ # ' @param code `character(1)`
460+ # '
461+ # ' @return `matrix` with `colnames = c("line", "col")`
462462# '
463463# ' @keywords internal
464464# ' @noRd
465- get_line_ids <- function (pd ) {
466- if (pd $ token [1 ] == " COMMENT" ) {
467- first_comment <- 1 : (which(pd $ parent == 0 )[1 ] - 1 )
468- pd_first_comment <- pd [first_comment , ]
469- pd <- pd [- first_comment , ]
470-
471- n <- nrow(pd_first_comment )
472- first_comment_ids <- data.frame (
473- lines = c(pd_first_comment [1 , " line1" ], pd_first_comment [n , " line2" ]),
474- cols = c(pd_first_comment [1 , " col1" ], pd_first_comment [n , " col2" ])
475- )
476- } else {
477- first_comment_ids <- NULL
478- }
479-
480- if (pd $ token [nrow(pd )] == " COMMENT" ) {
481- last_comment <- which(pd $ parent == 0 & pd $ token == " COMMENT" )
482- pd_last_comment <- pd [last_comment , ]
483- pd <- pd [- last_comment , ]
484-
485- n <- nrow(pd_last_comment )
486- last_comment_ids <- data.frame (
487- lines = c(pd_last_comment [1 , " line1" ], pd_last_comment [n , " line2" ]),
488- cols = c(pd_last_comment [1 , " col1" ], pd_last_comment [n , " col2" ])
489- )
490- } else {
491- last_comment_ids <- NULL
492- }
493-
494- # If NUM_CONST is the last element, we need to reorder rows.
495- # Last 2 rows
496- n <- nrow(pd )
497- if (pd $ token [n - 1 ] == " NUM_CONST" && pd $ parent [n ] == 0 ) {
498- pd <- rbind(pd [- (n - 1 ), ], pd [n - 1 , ])
499- }
500-
501- calls_start <- which(pd $ parent == 0 )
502- calls_end <- c(which(pd $ parent == 0 )[- 1 ] - 1 , nrow(pd ))
503-
504- call_ids <- list ()
505- for (i in seq_along(calls_start )) {
506- call <- pd [c(calls_start [i ], calls_end [i ]), ]
507- call_ids [[i ]] <-
508- data.frame (
509- lines = c(call [1 , " line1" ], call [2 , " line2" ]),
510- cols = c(call [1 , " col1" ], call [2 , " col2" ])
511- )
512- }
513-
514- if (! is.null(first_comment_ids )) {
515- call_ids [[1 ]] <- rbind(first_comment_ids [1 , ], call_ids [[1 ]][2 , ])
516- }
517- if (! is.null(last_comment_ids )) {
518- n <- length(call_ids )
519- call_ids [[n ]] <- rbind(call_ids [[n ]][1 , ], last_comment_ids [2 , ])
520- }
521- call_ids
465+ get_call_breaks <- function (code ) {
466+ parsed_code <- parse(text = code , keep.source = TRUE )
467+ pd <- utils :: getParseData(parsed_code )
468+ pd <- normalize_pd(pd )
469+ pd <- pd [pd $ token != " ';'" , ]
470+ call_breaks <- t(sapply(
471+ extract_calls(pd ),
472+ function (x ) {
473+ matrix (c(max(x $ line2 ), max(x $ col2 )))
474+ }
475+ ))
476+ if (nrow(call_breaks ) > 1 ) call_breaks <- call_breaks [- nrow(call_breaks ), ] # breaks in between needed only
477+ colnames(call_breaks ) <- c(" line" , " col" )
478+ call_breaks
522479}
523480
524481# ' Split code by calls
@@ -530,40 +487,23 @@ get_line_ids <- function(pd) {
530487# ' @keywords internal
531488# ' @noRd
532489split_code <- function (code ) {
533- parsed_code <- parse(text = code , keep.source = TRUE )
534- pd <- utils :: getParseData(parsed_code )
535- pd <- normalize_pd(pd )
536- pd <- pd [pd $ token != " ';'" , ]
537- lines_ids <- get_line_ids(pd )
538-
490+ call_breaks <- get_call_breaks(code )
491+ call_breaks <- call_breaks [order(call_breaks [, " line" ], call_breaks [, " col" ]), ]
539492 code_split <- strsplit(code , split = " \n " , fixed = TRUE )[[1 ]]
540- code_split_calls <- list ()
541-
542- for (i in seq_along(lines_ids )) {
543- code_lines <- code_split [lines_ids [[i ]]$ lines [1 ]: lines_ids [[i ]]$ lines [2 ]]
544-
545- if (length(code_lines ) == 1 ) {
546- code_lines_candidate <- substr(code_lines , lines_ids [[i ]]$ cols [1 ], lines_ids [[i ]]$ cols [2 ])
547- # in case only indentantion is changed, do not trim the indentation
548- if (! identical(code_lines_candidate , trimws(code_lines ))) {
549- # case of multiple calls in one line, keep the original indentation
550- indentation <- if (grepl(" ^\\ s+" , code_lines )) {
551- gsub(" ^(\\ s+).*" , " \\ 1" , code_lines )
552- } else {
553- " "
554- }
555- code_lines <- paste0(indentation , code_lines_candidate )
556- }
557- } else {
558- code_lines_candidate <- substr(code_lines [1 ], lines_ids [[i ]]$ cols [1 ], nchar(code_lines [1 ]))
559- # in case only indentantion is changed, do not trim the indentation
560- if (! identical(code_lines_candidate , trimws(code_lines [1 ]))) {
561- code_lines [1 ] <- code_lines_candidate
562- }
563- code_lines [length(code_lines )] <- substr(code_lines [length(code_lines )], 1 , lines_ids [[i ]]$ cols [2 ])
564- }
493+ char_count_lines <- c(0 , cumsum(sapply(code_split , nchar , USE.NAMES = FALSE ) + 1 ), - 1 )[seq_along(code_split )]
565494
566- code_split_calls [[i ]] <- paste(code_lines , collapse = " \n " )
567- }
568- code_split_calls
495+ idx_start <- c(
496+ 0 , # first call starts in the beginning of src
497+ char_count_lines [call_breaks [, " line" ]] + call_breaks [, " col" ] + 2
498+ )
499+ idx_end <- c(
500+ char_count_lines [call_breaks [, " line" ]] + call_breaks [, " col" ] + 1 ,
501+ nchar(code ) # last call end in the end of src
502+ )
503+ new_code <- substring(code , idx_start , idx_end )
504+
505+ # we need to remove leading semicolons from the calls and move them to the previous call
506+ # this is a reasult of a wrong split, which ends on the end of call and not on the ;
507+ # semicolon is treated by R parser as a separate call.
508+ gsub(" ^([[:space:]])*;(.+)$" , " \\ 1\\ 2" , new_code , perl = TRUE )
569509}
0 commit comments