Skip to content

Commit 6e2b326

Browse files
Merge pull request #384 from lorenzwalthert/long-strings-again
- Only replace long strings (#384).
2 parents c800182 + 32d447b commit 6e2b326

13 files changed

+211
-48
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ importFrom(purrr,flatten_chr)
2121
importFrom(purrr,flatten_int)
2222
importFrom(purrr,map)
2323
importFrom(purrr,map2)
24+
importFrom(purrr,map2_lgl)
2425
importFrom(purrr,map_chr)
2526
importFrom(purrr,map_lgl)
2627
importFrom(purrr,partial)

R/parse.R

Lines changed: 94 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#' @keywords internal
1818
tokenize <- function(text) {
1919
get_parse_data(text, include_text = NA) %>%
20-
verify_str_txt(text) %>%
20+
ensure_correct_str_txt(text) %>%
2121
enhance_mapping_special()
2222
}
2323

@@ -49,33 +49,108 @@ add_id_and_short <- function(pd) {
4949
}
5050

5151

52-
#' Verify the text of strings
52+
#' Ensure a correct `text` of all strings
5353
#'
5454
#' Make sure `text` of the tokens `STR_CONST` is correct and adapt if necessary.
5555
#' We first parse `text` again and include also non-terminal text. Then, we
5656
#' replace offending `text` in the terminal expressions with the text of their
57-
#' parents.
58-
#' @param pd_with_terminal_text A parse table.
59-
#' @param text The text from which `pd_with_terminal_text` was created. Needed
57+
#' parents if their line / col position matches and return an error otherwise.
58+
#' @param pd A parse table.
59+
#' @param text The text from which `pd` was created. Needed
6060
#' for potential reparsing.
6161
#' @keywords internal
62-
verify_str_txt <- function(pd_with_terminal_text, text) {
63-
string_ind <- pd_with_terminal_text$token == "STR_CONST"
64-
strings <- pd_with_terminal_text[string_ind, ]
65-
parent_of_strings_ind <- pd_with_terminal_text$id %in% strings$parent
66-
other_ind <- !(string_ind | parent_of_strings_ind)
67-
if (nrow(strings) == 0 || !any(substr(strings$text, 1, 1) == "[")) {
68-
return(pd_with_terminal_text)
62+
ensure_correct_str_txt <- function(pd, text) {
63+
ensure_valid_pd(pd)
64+
is_problematic_string <- identify_insufficiently_parsed_stings(pd, text)
65+
problematic_strings <- pd[is_problematic_string, ]
66+
is_parent_of_problematic_string <-
67+
pd$id %in% problematic_strings$parent
68+
69+
is_unaffected_token <- !(is_problematic_string | is_parent_of_problematic_string)
70+
if (!any(is_problematic_string)) {
71+
return(pd)
6972
}
73+
7074
pd_with_all_text <- get_parse_data(text, include_text = TRUE)
71-
parent_of_strings <- pd_with_all_text[parent_of_strings_ind, c("id", "text", "short")]
72-
strings$text <- NULL
73-
strings$short <- NULL
74-
new_strings <- merge(strings, parent_of_strings, by.x = "parent", by.y = "id")
75+
parent_cols_for_merge <- c("id", "text", "short", line_col_names())
76+
parent_of_problematic_strings <-
77+
pd_with_all_text[is_parent_of_problematic_string, parent_cols_for_merge]
78+
problematic_strings$text <- NULL
79+
problematic_strings$short <- NULL
80+
new_strings <- merge(problematic_strings, parent_of_problematic_strings,
81+
by.x = "parent",
82+
by.y = "id",
83+
suffixes = c("", "parent")
84+
) %>%
85+
as_tibble()
86+
87+
if (!lines_and_cols_match(new_strings)) {
88+
stop(paste(
89+
"Error in styler:::ensure_correct_str_txt().",
90+
"Please file an issue on GitHub (https://github.com/r-lib/styler/issues)",
91+
), call. = FALSE)
92+
}
93+
names_to_keep <- setdiff(
94+
names(new_strings),
95+
paste0(line_col_names(), "parent")
96+
)
7597
bind_rows(
76-
new_strings,
77-
pd_with_terminal_text[other_ind, ],
78-
pd_with_terminal_text[parent_of_strings_ind, ]
98+
new_strings[, names_to_keep],
99+
pd[is_unaffected_token, ],
100+
pd[is_parent_of_problematic_string, ]
79101
) %>%
80102
arrange(pos_id)
81103
}
104+
105+
#' Ensure that the parse data is valid
106+
#'
107+
#' Test whether all non-termnals have at least one child and throw an error
108+
#' otherwise. As this is check is rather expensive, it is only
109+
#' carried out for configurations we have good reasons to expect problems.
110+
#' @param pd A parse table.
111+
ensure_valid_pd <- function(pd) {
112+
if (getRversion() < "3.2") {
113+
non_terminals <- pd %>%
114+
filter(terminal == FALSE)
115+
valid_pd <- non_terminals$id %>%
116+
map_lgl(~ .x %in% pd$parent) %>%
117+
all()
118+
if (!valid_pd) {
119+
stop(paste(
120+
"The parse data is not valid and the problem is most likely related",
121+
"to the parser in base R. Please install R >= 3.2 and try again.",
122+
call. = FALSE
123+
))
124+
}
125+
}
126+
TRUE
127+
}
128+
129+
#' Indentify strings that were not fully parsed
130+
#'
131+
#' Indentifies strings that were not fully parsed due to their vast length.
132+
#' @details
133+
#' The meaning of the variable `is_problematic_string` in the source code
134+
#' changes from "all strings" to "all problematic strings", is partly
135+
#' missleading and this approach was choosen for performance reasons only.
136+
#' @param pd A parse table.
137+
#' @param text The initial code to style.
138+
identify_insufficiently_parsed_stings <- function(pd, text) {
139+
is_problematic_string <- pd$token == "STR_CONST"
140+
candidate_substring <- substr(
141+
pd$text[is_problematic_string], 1, 1
142+
)
143+
is_problematic_string[is_problematic_string] <- candidate_substring == "["
144+
is_problematic_string
145+
}
146+
147+
#' @importFrom purrr map2_lgl
148+
lines_and_cols_match <- function(data) {
149+
left <- paste0(line_col_names(), "")
150+
right <- paste0(line_col_names(), "parent")
151+
map2_lgl(left, right,
152+
two_cols_match,
153+
data = data
154+
) %>%
155+
all()
156+
}

man/verify_str_txt.Rd renamed to man/ensure_correct_str_txt.Rd

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/ensure_valid_pd.Rd

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/identify_insufficiently_parsed_stings.Rd

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/style_dir.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/style_file.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/style_pkg.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/two_cols_match.Rd

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/parsing/long_strings-in.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
b <-
22
3
3+
g <-
34
"v x ijyuldlf ixi tt ucw nk xejkf omch ujm ymgsgkwickxn tg zknjxmk aqtgqrn bhv
45
se g ec avo xs nyz fhadktjlwuocti au y gxv y xbr x kxn om dkaderkl xqok
56
pp ud lcw pnft ggzz lu v sgs ysv uyyxp gmcvt o rumej rfed j qy ozo
@@ -32,3 +33,5 @@ b <-
3233
'test'
3334
'test"ji"' # comment
3435
1
36+
37+
call("a_is_long" = 2)

0 commit comments

Comments
 (0)