|
17 | 17 | #' @keywords internal
|
18 | 18 | tokenize <- function(text) {
|
19 | 19 | get_parse_data(text, include_text = NA) %>%
|
20 |
| - verify_str_txt(text) %>% |
| 20 | + ensure_correct_str_txt(text) %>% |
21 | 21 | enhance_mapping_special()
|
22 | 22 | }
|
23 | 23 |
|
@@ -49,33 +49,108 @@ add_id_and_short <- function(pd) {
|
49 | 49 | }
|
50 | 50 |
|
51 | 51 |
|
52 |
| -#' Verify the text of strings |
| 52 | +#' Ensure a correct `text` of all strings |
53 | 53 | #'
|
54 | 54 | #' Make sure `text` of the tokens `STR_CONST` is correct and adapt if necessary.
|
55 | 55 | #' We first parse `text` again and include also non-terminal text. Then, we
|
56 | 56 | #' replace offending `text` in the terminal expressions with the text of their
|
57 |
| -#' parents. |
58 |
| -#' @param pd_with_terminal_text A parse table. |
59 |
| -#' @param text The text from which `pd_with_terminal_text` was created. Needed |
| 57 | +#' parents if their line / col position matches and return an error otherwise. |
| 58 | +#' @param pd A parse table. |
| 59 | +#' @param text The text from which `pd` was created. Needed |
60 | 60 | #' for potential reparsing.
|
61 | 61 | #' @keywords internal
|
62 |
| -verify_str_txt <- function(pd_with_terminal_text, text) { |
63 |
| - string_ind <- pd_with_terminal_text$token == "STR_CONST" |
64 |
| - strings <- pd_with_terminal_text[string_ind, ] |
65 |
| - parent_of_strings_ind <- pd_with_terminal_text$id %in% strings$parent |
66 |
| - other_ind <- !(string_ind | parent_of_strings_ind) |
67 |
| - if (nrow(strings) == 0 || !any(substr(strings$text, 1, 1) == "[")) { |
68 |
| - return(pd_with_terminal_text) |
| 62 | +ensure_correct_str_txt <- function(pd, text) { |
| 63 | + ensure_valid_pd(pd) |
| 64 | + is_problematic_string <- identify_insufficiently_parsed_stings(pd, text) |
| 65 | + problematic_strings <- pd[is_problematic_string, ] |
| 66 | + is_parent_of_problematic_string <- |
| 67 | + pd$id %in% problematic_strings$parent |
| 68 | + |
| 69 | + is_unaffected_token <- !(is_problematic_string | is_parent_of_problematic_string) |
| 70 | + if (!any(is_problematic_string)) { |
| 71 | + return(pd) |
69 | 72 | }
|
| 73 | + |
70 | 74 | pd_with_all_text <- get_parse_data(text, include_text = TRUE)
|
71 |
| - parent_of_strings <- pd_with_all_text[parent_of_strings_ind, c("id", "text", "short")] |
72 |
| - strings$text <- NULL |
73 |
| - strings$short <- NULL |
74 |
| - new_strings <- merge(strings, parent_of_strings, by.x = "parent", by.y = "id") |
| 75 | + parent_cols_for_merge <- c("id", "text", "short", line_col_names()) |
| 76 | + parent_of_problematic_strings <- |
| 77 | + pd_with_all_text[is_parent_of_problematic_string, parent_cols_for_merge] |
| 78 | + problematic_strings$text <- NULL |
| 79 | + problematic_strings$short <- NULL |
| 80 | + new_strings <- merge(problematic_strings, parent_of_problematic_strings, |
| 81 | + by.x = "parent", |
| 82 | + by.y = "id", |
| 83 | + suffixes = c("", "parent") |
| 84 | + ) %>% |
| 85 | + as_tibble() |
| 86 | + |
| 87 | + if (!lines_and_cols_match(new_strings)) { |
| 88 | + stop(paste( |
| 89 | + "Error in styler:::ensure_correct_str_txt().", |
| 90 | + "Please file an issue on GitHub (https://github.com/r-lib/styler/issues)", |
| 91 | + ), call. = FALSE) |
| 92 | + } |
| 93 | + names_to_keep <- setdiff( |
| 94 | + names(new_strings), |
| 95 | + paste0(line_col_names(), "parent") |
| 96 | + ) |
75 | 97 | bind_rows(
|
76 |
| - new_strings, |
77 |
| - pd_with_terminal_text[other_ind, ], |
78 |
| - pd_with_terminal_text[parent_of_strings_ind, ] |
| 98 | + new_strings[, names_to_keep], |
| 99 | + pd[is_unaffected_token, ], |
| 100 | + pd[is_parent_of_problematic_string, ] |
79 | 101 | ) %>%
|
80 | 102 | arrange(pos_id)
|
81 | 103 | }
|
| 104 | + |
| 105 | +#' Ensure that the parse data is valid |
| 106 | +#' |
| 107 | +#' Test whether all non-termnals have at least one child and throw an error |
| 108 | +#' otherwise. As this is check is rather expensive, it is only |
| 109 | +#' carried out for configurations we have good reasons to expect problems. |
| 110 | +#' @param pd A parse table. |
| 111 | +ensure_valid_pd <- function(pd) { |
| 112 | + if (getRversion() < "3.2") { |
| 113 | + non_terminals <- pd %>% |
| 114 | + filter(terminal == FALSE) |
| 115 | + valid_pd <- non_terminals$id %>% |
| 116 | + map_lgl(~ .x %in% pd$parent) %>% |
| 117 | + all() |
| 118 | + if (!valid_pd) { |
| 119 | + stop(paste( |
| 120 | + "The parse data is not valid and the problem is most likely related", |
| 121 | + "to the parser in base R. Please install R >= 3.2 and try again.", |
| 122 | + call. = FALSE |
| 123 | + )) |
| 124 | + } |
| 125 | + } |
| 126 | + TRUE |
| 127 | +} |
| 128 | + |
| 129 | +#' Indentify strings that were not fully parsed |
| 130 | +#' |
| 131 | +#' Indentifies strings that were not fully parsed due to their vast length. |
| 132 | +#' @details |
| 133 | +#' The meaning of the variable `is_problematic_string` in the source code |
| 134 | +#' changes from "all strings" to "all problematic strings", is partly |
| 135 | +#' missleading and this approach was choosen for performance reasons only. |
| 136 | +#' @param pd A parse table. |
| 137 | +#' @param text The initial code to style. |
| 138 | +identify_insufficiently_parsed_stings <- function(pd, text) { |
| 139 | + is_problematic_string <- pd$token == "STR_CONST" |
| 140 | + candidate_substring <- substr( |
| 141 | + pd$text[is_problematic_string], 1, 1 |
| 142 | + ) |
| 143 | + is_problematic_string[is_problematic_string] <- candidate_substring == "[" |
| 144 | + is_problematic_string |
| 145 | +} |
| 146 | + |
| 147 | +#' @importFrom purrr map2_lgl |
| 148 | +lines_and_cols_match <- function(data) { |
| 149 | + left <- paste0(line_col_names(), "") |
| 150 | + right <- paste0(line_col_names(), "parent") |
| 151 | + map2_lgl(left, right, |
| 152 | + two_cols_match, |
| 153 | + data = data |
| 154 | + ) %>% |
| 155 | + all() |
| 156 | +} |
0 commit comments