r-lib
diff --git a/‎NAMESPACE
Lines changed: 1 addition & 0 deletions b/‎NAMESPACE
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/parse.R
Lines changed: 94 additions & 19 deletions b/‎R/parse.R
Lines changed: 94 additions & 19 deletions
diff --git a/‎man/verify_str_txt.Rd renamed to ‎man/ensure_correct_str_txt.Rd
Lines changed: 7 additions & 7 deletions b/‎man/verify_str_txt.Rd renamed to ‎man/ensure_correct_str_txt.Rd
Lines changed: 7 additions & 7 deletions
diff --git a/‎man/ensure_valid_pd.Rd
Lines changed: 16 additions & 0 deletions b/‎man/ensure_valid_pd.Rd
Lines changed: 16 additions & 0 deletions
diff --git a/‎man/identify_insufficiently_parsed_stings.Rd
Lines changed: 21 additions & 0 deletions b/‎man/identify_insufficiently_parsed_stings.Rd
Lines changed: 21 additions & 0 deletions
diff --git a/‎man/style_dir.Rd
Lines changed: 1 addition & 1 deletion b/‎man/style_dir.Rd
Lines changed: 1 addition & 1 deletion
diff --git a/‎man/style_file.Rd
Lines changed: 1 addition & 1 deletion b/‎man/style_file.Rd
Lines changed: 1 addition & 1 deletion
diff --git a/‎man/style_pkg.Rd
Lines changed: 1 addition & 1 deletion b/‎man/style_pkg.Rd
Lines changed: 1 addition & 1 deletion
diff --git a/‎man/two_cols_match.Rd
Lines changed: 16 additions & 0 deletions b/‎man/two_cols_match.Rd
Lines changed: 16 additions & 0 deletions
diff --git a/‎tests/testthat/parsing/long_strings-in.R
Lines changed: 3 additions & 0 deletions b/‎tests/testthat/parsing/long_strings-in.R
Lines changed: 3 additions & 0 deletions
@@ -21,6 +21,7 @@ importFrom(purrr,flatten_chr)
 importFrom(purrr,flatten_int)
 importFrom(purrr,map)
 importFrom(purrr,map2)
+importFrom(purrr,map2_lgl)
 importFrom(purrr,map_chr)
 importFrom(purrr,map_lgl)
 importFrom(purrr,partial)
 
@@ -17,7 +17,7 @@
 #' @keywords internal
 tokenize <- function(text) {
   get_parse_data(text, include_text = NA) %>%
-    verify_str_txt(text) %>%
+    ensure_correct_str_txt(text) %>%
     enhance_mapping_special()
 }
 
@@ -49,33 +49,108 @@ add_id_and_short <- function(pd) {
 }
 
 
-#' Verify the text of strings
+#' Ensure a correct `text` of all strings
 #'
 #' Make sure `text` of the tokens `STR_CONST` is correct and adapt if necessary.
 #' We first parse `text` again and include also non-terminal text. Then, we
 #' replace offending `text` in the terminal expressions with the text of their
-#' parents.
-#' @param pd_with_terminal_text A parse table.
-#' @param text The text from which `pd_with_terminal_text` was created. Needed
+#' parents if their line / col position matches and return an error otherwise.
+#' @param pd A parse table.
+#' @param text The text from which `pd` was created. Needed
 #'   for potential reparsing.
 #' @keywords internal
-verify_str_txt <- function(pd_with_terminal_text, text) {
-  string_ind <- pd_with_terminal_text$token == "STR_CONST"
-  strings <- pd_with_terminal_text[string_ind, ]
-  parent_of_strings_ind <- pd_with_terminal_text$id %in% strings$parent
-  other_ind <- !(string_ind | parent_of_strings_ind)
-  if (nrow(strings) == 0 || !any(substr(strings$text, 1, 1) == "[")) {
-    return(pd_with_terminal_text)
+ensure_correct_str_txt <- function(pd, text) {
+  ensure_valid_pd(pd)
+  is_problematic_string <- identify_insufficiently_parsed_stings(pd, text)
+  problematic_strings <- pd[is_problematic_string, ]
+  is_parent_of_problematic_string <-
+    pd$id %in% problematic_strings$parent
+
+  is_unaffected_token <- !(is_problematic_string | is_parent_of_problematic_string)
+  if (!any(is_problematic_string)) {
+    return(pd)
   }
+
   pd_with_all_text <- get_parse_data(text, include_text = TRUE)
-  parent_of_strings <- pd_with_all_text[parent_of_strings_ind, c("id", "text", "short")]
-  strings$text <- NULL
-  strings$short <- NULL
-  new_strings <- merge(strings, parent_of_strings, by.x = "parent", by.y = "id")
+  parent_cols_for_merge <- c("id", "text", "short", line_col_names())
+  parent_of_problematic_strings <-
+    pd_with_all_text[is_parent_of_problematic_string, parent_cols_for_merge]
+  problematic_strings$text <- NULL
+  problematic_strings$short <- NULL
+  new_strings <- merge(problematic_strings, parent_of_problematic_strings,
+    by.x = "parent",
+    by.y = "id",
+    suffixes = c("", "parent")
+  ) %>%
+    as_tibble()
+
+  if (!lines_and_cols_match(new_strings)) {
+    stop(paste(
+      "Error in styler:::ensure_correct_str_txt().",
+      "Please file an issue on GitHub (https://github.com/r-lib/styler/issues)",
+    ), call. = FALSE)
+  }
+  names_to_keep <- setdiff(
+    names(new_strings),
+    paste0(line_col_names(), "parent")
+  )
   bind_rows(
-    new_strings,
-    pd_with_terminal_text[other_ind, ],
-    pd_with_terminal_text[parent_of_strings_ind, ]
+    new_strings[, names_to_keep],
+    pd[is_unaffected_token, ],
+    pd[is_parent_of_problematic_string, ]
   ) %>%
     arrange(pos_id)
 }
+
+#' Ensure that the parse data is valid
+#'
+#' Test whether all non-termnals have at least one child and throw an error
+#' otherwise. As this is check is rather expensive, it is only
+#' carried out for configurations we have good reasons to expect problems.
+#' @param pd A parse table.
+ensure_valid_pd <- function(pd) {
+  if (getRversion() < "3.2") {
+    non_terminals <- pd %>%
+      filter(terminal == FALSE)
+    valid_pd <- non_terminals$id %>%
+      map_lgl(~ .x %in% pd$parent) %>%
+      all()
+    if (!valid_pd) {
+      stop(paste(
+        "The parse data is not valid and the problem is most likely related",
+        "to the parser in base R. Please install R >= 3.2 and try again.",
+        call. = FALSE
+      ))
+    }
+  }
+  TRUE
+}
+
+#' Indentify strings that were not fully parsed
+#'
+#' Indentifies strings that were not fully parsed due to their vast length.
+#' @details
+#' The meaning of the variable `is_problematic_string` in the source code
+#' changes from "all strings" to "all problematic strings", is partly
+#' missleading and this approach was choosen for performance reasons only.
+#' @param pd A parse table.
+#' @param text The initial code to style.
+identify_insufficiently_parsed_stings <- function(pd, text) {
+  is_problematic_string <- pd$token == "STR_CONST"
+  candidate_substring <- substr(
+    pd$text[is_problematic_string], 1, 1
+  )
+  is_problematic_string[is_problematic_string] <- candidate_substring == "["
+  is_problematic_string
+}
+
+#' @importFrom purrr map2_lgl
+lines_and_cols_match <- function(data) {
+  left <- paste0(line_col_names(), "")
+  right <- paste0(line_col_names(), "parent")
+  map2_lgl(left, right,
+    two_cols_match,
+    data = data
+  ) %>%
+  all()
+}
@@ -1,5 +1,6 @@
 b <-
  3
+g <-
 "v x ijyuldlf ixi tt ucw nk xejkf omch  ujm ymgsgkwickxn tg zknjxmk aqtgqrn bhv
  se g ec  avo  xs nyz   fhadktjlwuocti au  y gxv y xbr x kxn om dkaderkl  xqok
  pp ud lcw  pnft ggzz lu v  sgs  ysv uyyxp gmcvt   o   rumej  rfed j qy   ozo
@@ -32,3 +33,5 @@ b <-
 'test'
 'test"ji"' # comment
 1
+
+call("a_is_long" = 2)
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,6 @@`
`1`	`1`	`b <-`
`2`	`2`	`3`
	`3`	`+g <-`
`3`	`4`	`"v x ijyuldlf ixi tt ucw nk xejkf omch ujm ymgsgkwickxn tg zknjxmk aqtgqrn bhv`
`4`	`5`	`se g ec avo xs nyz fhadktjlwuocti au y gxv y xbr x kxn om dkaderkl xqok`
`5`	`6`	`pp ud lcw pnft ggzz lu v sgs ysv uyyxp gmcvt o rumej rfed j qy ozo`
`@@ -32,3 +33,5 @@ b <-`
`32`	`33`	`'test'`
`33`	`34`	`'test"ji"' # comment`
`34`	`35`	`1`
	`36`	`+`
	`37`	`+call("a_is_long" = 2)`