Skip to content

Commit 0a9f468

Browse files
Merge pull request #90 from lorenzwalthert/speedup
Speedup of nested styler functions by ~ 4x.
2 parents 59de93f + c339fa8 commit 0a9f468

File tree

11 files changed

+246
-141
lines changed

11 files changed

+246
-141
lines changed

R/modify_pd.R

Lines changed: 58 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -9,82 +9,32 @@ NULL
99

1010
#' @describeIn update_indention Inserts indetion based on round brackets.
1111
indent_round <- function(pd, indent_by) {
12-
indention_needed <- needs_indention(pd, token = "'('")
13-
if (indention_needed) {
14-
opening <- which(pd$token == "'('")
15-
start <- opening + 1
16-
stop <- nrow(pd) - 1
17-
if (start > stop) return(pd)
18-
19-
pd <- pd %>%
20-
mutate(indent = indent + ifelse(seq_len(nrow(pd)) %in% start:stop,
21-
indent_by, 0))
22-
}
23-
24-
pd %>%
25-
set_unindention_child(token = "')'", unindent_by = indent_by)
12+
indent_indices <- compute_indent_indices(pd, token = "'('")
13+
pd$indent[indent_indices] <- pd$indent[indent_indices] + indent_by
14+
set_unindention_child(pd, token = "')'", unindent_by = indent_by)
2615
}
16+
2717
#' @rdname update_indention
2818
indent_curly <- function(pd, indent_by) {
29-
indention_needed <- needs_indention(pd, token = "'{'")
30-
if (indention_needed) {
31-
opening <- which(pd$token == "'{'")
32-
start <- opening + 1
33-
stop <- nrow(pd) - 1
34-
if (start > stop) return(pd)
35-
36-
pd <- pd %>%
37-
mutate(indent = indent + ifelse(seq_len(nrow(pd)) %in% start:stop,
38-
indent_by, 0))
39-
}
40-
pd %>%
41-
set_unindention_child(token = "'}'", unindent_by = indent_by)
42-
}
43-
44-
#' Check whether indention is needed
45-
#'
46-
#' @param pd A parse table.
47-
#' @param token Which token the check should be based on.
48-
#' @return returns `TRUE` if indention is needed, `FALSE` otherwise. Indention
49-
#' is needed:
50-
#' * if `token` occurs in `pd`.
51-
#' * if there is no child that starts on the same line as `token` and
52-
#' "opens" indention without closing it on this line.
53-
#' @return `TRUE` if indention is needed, `FALSE` otherwise.
54-
needs_indention <- function(pd, token = "'('") {
55-
opening <- which(pd$token %in% token)[1]
56-
if (is.na(opening)) return(FALSE)
57-
before_first_break <- which(pd$lag_newlines > 0)[1] - 1
58-
if (is.na(before_first_break)) return(FALSE)
59-
!any(pd$multi_line[opening:before_first_break])
19+
indent_indices <- compute_indent_indices(pd, token = "'{'")
20+
pd$indent[indent_indices] <- pd$indent[indent_indices] + indent_by
21+
set_unindention_child(pd, token = "'}'", unindent_by = indent_by)
6022
}
6123

6224
#' @rdname update_indention
6325
indent_op <- function(pd, indent_by, token = c(math_token,
6426
"SPECIAL-PIPE")) {
65-
if (needs_indention(pd, token)) {
66-
opening <- which(pd$token %in% token)
67-
start <- opening[1] + 1
68-
stop <- nrow(pd)
69-
pd <- pd %>%
70-
mutate(indent = indent + ifelse(seq_len(nrow(pd)) %in% start:stop,
71-
indent_by, 0))
72-
}
27+
indent_indices <- compute_indent_indices(pd, token, indent_last = TRUE)
28+
pd$indent[indent_indices] <- pd$indent[indent_indices] + indent_by
7329
pd
7430
}
7531

7632
#' @describeIn update_indention Same as indent_op, but only indents one token
7733
#' after `token`, not all remaining.
7834
indent_assign <- function(pd, indent_by, token = c("LEFT_ASSIGN", "
7935
EQ_ASSIGN")) {
80-
if (needs_indention(pd, token)) {
81-
opening <- which(pd$token %in% token)
82-
start <- opening + 1
83-
stop <- start + 1
84-
pd <- pd %>%
85-
mutate(indent = indent + ifelse(seq_len(nrow(pd)) %in% start:stop,
86-
indent_by, 0))
87-
}
36+
indent_indices <- compute_indent_indices(pd, token, indent_last = TRUE)
37+
pd$indent[indent_indices] <- pd$indent[indent_indices] + indent_by
8838
pd
8939
}
9040

@@ -98,15 +48,56 @@ indent_without_paren <- function(pd, indent_by = 2) {
9848
pd
9949
}
10050

51+
#' Compute the indices that need indention
52+
#'
53+
#' Based on `token`, find the rows in `pd` that need to be indented.
54+
#' @param pd A parse table.
55+
#' @param token A character vector with tokens.
56+
#' @param indent_last Flag to indicate whether the last token in `pd` should
57+
#' be indented or not. See 'Details'.
58+
#' @details
59+
#' For example when `token` is a parenthesis, the closing parenthesis does not
60+
#' need indention, but if token is something else, for example a plus (+), the
61+
#' last token in `pd` needs indention.
62+
compute_indent_indices <- function(pd, token = "'('", indent_last = FALSE) {
63+
npd <- nrow(pd)
64+
opening <- which(pd$token %in% token)[1]
65+
if (!needs_indention(pd, opening)) return(numeric(0))
66+
start <- opening + 1
67+
stop <- npd - ifelse(indent_last, 0, 1)
68+
which(between(seq_len(npd), start, stop))
69+
}
70+
71+
72+
#' Check whether indention is needed
73+
#'
74+
#' @param pd A parse table.
75+
#' @param opening the index of the opening parse table. Since always computed
76+
#' before this function is called, it is included as an argument so it does
77+
#' not have to be recomputed.
78+
#' @return returns `TRUE` if indention is needed, `FALSE` otherwise. Indention
79+
#' is needed if and only if:
80+
#' * the opening token is not `NA`.
81+
#' * if there is a multi-line token before the first line break.
82+
#' @return `TRUE` if indention is needed, `FALSE` otherwise.
83+
needs_indention <- function(pd, opening) {
84+
if (is.na(opening)) return(FALSE)
85+
before_first_break <- which(pd$lag_newlines > 0)[1] - 1
86+
if (is.na(before_first_break)) return(FALSE)
87+
!any(pd$multi_line[opening:before_first_break])
88+
}
89+
90+
91+
10192
#' Set the multi-line column
10293
#'
10394
#' Sets the column `multi_line` in `pd` by checking row-wise whether any child
10495
#' of a token is a multi-line token.
10596
#' @param pd A parse table.
10697
#' @importFrom purrr map_lgl
10798
set_multi_line <- function(pd) {
108-
pd %>%
109-
mutate(multi_line = map_lgl(child, token_is_multi_line))
99+
pd$multi_line <- map_lgl(pd$child, token_is_multi_line)
100+
pd
110101
}
111102

112103
#' Check whether a parse table is a multi-line token
@@ -117,7 +108,7 @@ set_multi_line <- function(pd) {
117108
#' * it has at least one child that is a multi-line expression itself.
118109
#' @param pd A parse table.
119110
token_is_multi_line <- function(pd) {
120-
any(pd$multi_line) | any(pd$lag_newlines)
111+
any(pd$multi_line, pd$lag_newlines > 0)
121112
}
122113

123114

@@ -127,6 +118,7 @@ token_is_multi_line <- function(pd) {
127118
#' @param pd_flat A flat parse table.
128119
#' @return A nested parse table.
129120
strip_eol_spaces <- function(pd_flat) {
130-
pd_flat %>%
131-
mutate(spaces = spaces * (lead(lag_newlines, default = 0) == 0))
121+
idx <- lead(pd_flat$lag_newlines, default = 0) != 0
122+
pd_flat$spaces[idx] <- 0
123+
pd_flat
132124
}

R/nested.R

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,8 @@ compute_parse_data_nested <- function(text) {
2929
add_terminal_token_before() %>%
3030
add_terminal_token_after()
3131

32+
parse_data$child <- rep(list(NULL), length(parse_data$text))
3233
pd_nested <- parse_data %>%
33-
mutate_(child = ~rep(list(NULL), length(text))) %>%
34-
mutate_(short = ~substr(text, 1, 5)) %>%
35-
select_(~short, ~everything()) %>%
3634
nest_parse_data() %>%
3735
flatten_operators()
3836

@@ -48,6 +46,7 @@ tokenize <- function(text) {
4846
parsed <- parse(text = text, keep.source = TRUE)
4947
parse_data <- as_tibble(utils::getParseData(parsed, includeText = NA)) %>%
5048
enhance_mapping_special()
49+
parse_data$short <- substr(parse_data$text, 1, 5)
5150
parse_data
5251
}
5352

@@ -57,13 +56,13 @@ tokenize <- function(text) {
5756
#' description.
5857
#' @param pd A parse table.
5958
enhance_mapping_special <- function(pd) {
60-
pd %>%
61-
mutate(token = case_when(
59+
pd$token <- with(pd, case_when(
6260
token != "SPECIAL" ~ token,
6361
text == "%>%" ~ special_and("PIPE"),
6462
text == "%in%" ~ special_and("IN"),
6563
TRUE ~ special_and("OTHER")
6664
))
65+
pd
6766
}
6867

6968
special_and <- function(text) {
@@ -98,19 +97,23 @@ NULL
9897

9998
#' @rdname add_token_terminal
10099
add_terminal_token_after <- function(pd_flat) {
101-
pd_flat %>%
100+
terminals <- pd_flat %>%
102101
filter(terminal) %>%
103-
arrange(line1, col1) %>%
104-
transmute(id = id, token_after = lead(token, default = "")) %>%
102+
arrange(line1, col1)
103+
104+
data_frame(id = terminals$id,
105+
token_after = lead(terminals$token, default = "")) %>%
105106
left_join(pd_flat, ., by = "id")
106107
}
107108

108109
#' @rdname add_token_terminal
109110
add_terminal_token_before <- function(pd_flat) {
110-
pd_flat %>%
111+
terminals <- pd_flat %>%
111112
filter(terminal) %>%
112-
arrange(line1, col1) %>%
113-
transmute(id = id, token_before = lag(token, default = "")) %>%
113+
arrange(line1, col1)
114+
115+
data_frame(id = terminals$id,
116+
token_before = lag(terminals$token, default = "")) %>%
114117
left_join(pd_flat, ., by = "id")
115118
}
116119

@@ -146,24 +149,22 @@ set_spaces <- function(spaces_after_prefix, force_one) {
146149
#' @importFrom purrr map2
147150
nest_parse_data <- function(pd_flat) {
148151
if (all(pd_flat$parent <= 0)) return(pd_flat)
149-
split <- pd_flat %>%
150-
mutate_(internal = ~ (id %in% parent) | (parent <= 0)) %>%
151-
nest_("data", names(pd_flat))
152+
pd_flat$internal <- with(pd_flat, (id %in% parent) | (parent <= 0))
153+
split_data <- split(pd_flat, pd_flat$internal)
152154

153-
child <- split$data[!split$internal][[1L]]
154-
internal <- split$data[split$internal][[1L]]
155+
child <- split_data$`FALSE`
156+
internal <- split_data$`TRUE`
155157

156158
internal <- rename_(internal, internal_child = ~child)
157159

158-
nested <-
160+
child$parent_ <- child$parent
161+
joined <-
159162
child %>%
160-
mutate_(parent_ = ~parent) %>%
161163
nest_(., "child", setdiff(names(.), "parent_")) %>%
162-
left_join(internal, ., by = c("id" = "parent_")) %>%
163-
mutate_(child = ~map2(child, internal_child, combine_children)) %>%
164-
select_(~-internal_child) %>%
165-
select_(~short, ~everything(), ~-text, ~text)
166-
164+
left_join(internal, ., by = c("id" = "parent_"))
165+
nested <- joined
166+
nested$child <- map2(nested$child, nested$internal_child, combine_children)
167+
nested <- nested[, setdiff(names(nested), "internal_child")]
167168
nest_parse_data(nested)
168169
}
169170

@@ -179,7 +180,8 @@ nest_parse_data <- function(pd_flat) {
179180
combine_children <- function(child, internal_child) {
180181
bound <- bind_rows(child, internal_child)
181182
if (nrow(bound) == 0) return(NULL)
182-
arrange_(bound, ~line1, ~col1)
183+
bound[order(bound$line1, bound$col1), ]
184+
183185
}
184186

185187
#' Get the start right

R/parsed.R

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,12 @@ enhance_parse_data <- function(parse_data) {
4444
parse_data_filtered %>%
4545
create_filler()
4646

47-
parse_data_comment_eol <-
48-
parse_data_filled %>%
49-
mutate_(text = ~if_else(token == "COMMENT", gsub(" +$", "", text), text))
47+
parse_data_comment_eol <- parse_data_filled
48+
49+
parse_data_comment_eol$text <-
50+
if_else(parse_data_comment_eol$token == "COMMENT",
51+
gsub(" +$", "", parse_data_comment_eol$text),
52+
parse_data_comment_eol$text)
5053

5154
parse_data_comment_eol
5255
}
@@ -81,18 +84,17 @@ verify_roundtrip <- function(pd_flat, text) {
8184
#' @return A parse table with two three columns: lag_newlines, newlines and
8285
#' spaces.
8386
create_filler <- function(pd_flat) {
84-
ret <-
85-
pd_flat %>%
86-
mutate_(
87-
line3 = ~lead(line1, default = tail(line2, 1)),
88-
col3 = ~lead(col1, default = tail(col2, 1) + 1L),
89-
newlines = ~line3 - line2,
90-
lag_newlines = ~lag(newlines, default = 0),
91-
col2_nl = ~if_else(newlines > 0L, 0L, col2),
92-
spaces = ~col3 - col2_nl - 1L,
93-
multi_line = ~ifelse(terminal, FALSE, NA)
94-
) %>%
95-
select_(~-line3, ~-col3, ~-col2_nl)
87+
88+
pd_flat$line3 <- lead(pd_flat$line1, default = tail(pd_flat$line2, 1))
89+
pd_flat$col3 <- lead(pd_flat$col1, default = tail(pd_flat$col2, 1) + 1L)
90+
pd_flat$newlines <- pd_flat$line3 - pd_flat$line2
91+
pd_flat$lag_newlines <- lag(pd_flat$newlines, default = 0)
92+
pd_flat$col2_nl <- if_else(pd_flat$newlines > 0L, 0L, pd_flat$col2)
93+
pd_flat$spaces <- pd_flat$col3 - pd_flat$col2_nl - 1L
94+
pd_flat$multi_line <- ifelse(pd_flat$terminal, FALSE, NA)
95+
96+
ret <- pd_flat[, !(names(pd_flat) %in% c("line3", "col3", "col2_nl"))]
97+
9698

9799
if (!("indent" %in% names(ret))) {
98100
ret$indent <- 0

R/rules-replacement.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ force_assignment_op <- function(pd) {
88

99
resolve_semicolon <- function(pd) {
1010
is_semicolon <- pd$token == "';'"
11+
if (!any(is_semicolon)) return(pd)
1112
pd$lag_newlines[lag(is_semicolon)] <- 1L
12-
pd <- pd[!is_semicolon,]
13+
pd <- pd[!is_semicolon, ]
1314
pd
1415
}

0 commit comments

Comments
 (0)