|
7 | 7 | #' of the parse table. |
8 | 8 | #' @importFrom purrr when |
9 | 9 | #' @keywords internal |
10 | | -compute_parse_data_nested <- function(text) { |
| 10 | +compute_parse_data_nested <- function(text, |
| 11 | + transformers) { |
11 | 12 | parse_data <- tokenize(text) %>% |
12 | 13 | add_terminal_token_before() %>% |
13 | 14 | add_terminal_token_after() %>% |
14 | | - add_stylerignore() |
| 15 | + add_stylerignore() %>% |
| 16 | + add_attributes_caching(transformers) %>% |
| 17 | + drop_cached_children() |
15 | 18 |
|
16 | 19 | env_add_stylerignore(parse_data) |
17 | 20 |
|
18 | 21 | parse_data$child <- rep(list(NULL), length(parse_data$text)) |
19 | 22 | pd_nested <- parse_data %>% |
20 | 23 | nest_parse_data() %>% |
21 | 24 | flatten_operators() %>% |
22 | | - when(any(parse_data$token == "EQ_ASSIGN") ~ relocate_eq_assign(.), ~.) |
| 25 | + when(any(parse_data$token == "EQ_ASSIGN") ~ relocate_eq_assign(.), ~.) %>% |
| 26 | + add_cache_block() |
23 | 27 |
|
24 | 28 | pd_nested |
25 | 29 | } |
26 | 30 |
|
| 31 | +#' Add the block id to a parse table |
| 32 | +#' |
| 33 | +#' Must be after [nest_parse_data()] because requires a nested parse table as |
| 34 | +#' input. |
| 35 | +#' @param pd_nested A top level nest. |
| 36 | +#' @keywords internal |
| 37 | +#' @importFrom rlang seq2 |
| 38 | +add_cache_block <- function(pd_nested) { |
| 39 | + if (cache_is_activated()) { |
| 40 | + pd_nested$block <- cache_find_block(pd_nested) |
| 41 | + } else { |
| 42 | + pd_nested$block <- rep(1, nrow(pd_nested)) |
| 43 | + } |
| 44 | + pd_nested |
| 45 | +} |
| 46 | + |
| 47 | +#' Drop all children of a top level expression that are cached |
| 48 | +#' |
| 49 | +#' Note that we do cache top-level comments. Because package code has a lot of |
| 50 | +#' roxygen comments and each of them is a top level expresion, checking is |
| 51 | +#' very expensive. |
| 52 | +#' @param pd A top-level nest. |
| 53 | +#' @details |
| 54 | +#' Because we process in blocks of expressions for speed, a cached expression |
| 55 | +#' will always end up in a block that won't be styled again (usual case), unless |
| 56 | +#' it's on a line where multiple expressions sit and at least one is not styled |
| 57 | +#' (exception). |
| 58 | +#' |
| 59 | +#' **usual case: All other expressions in a block are cached** |
| 60 | +#' |
| 61 | +#' Cached expressiond don't need to be transformed with `transformers` in |
| 62 | +#' [parse_transform_serialize_r_block()], we simply return `text` for the top |
| 63 | +#' level token. For that |
| 64 | +#' reason, the nested parse table can, at the rows where these expressions are |
| 65 | +#' located, be shallow, i.e. it does not have to contain a child, because it |
| 66 | +#' will neither be transformed nor serialized anytime. This function drops all |
| 67 | +#' associated tokens except the top-level token for such expressions, which will |
| 68 | +#' result in large speed improvements in [compute_parse_data_nested()] because |
| 69 | +#' nesting is expensive and will not be done for cached expressions. |
| 70 | +#' |
| 71 | +#' **exception: Not all other expressions in a block are cached** |
| 72 | +#' |
| 73 | +#' As described in [cache_find_block()], expressions on the same line are always |
| 74 | +#' put into one block. If any element of a block is not cached, the block will |
| 75 | +#' be styled as a whole. If the parse table was made shallow (and the top level) |
| 76 | +#' expresion is still marked as non-terminal, `text` will never be used in the |
| 77 | +#' transformation process and eventually lost. Hence, we must change the top |
| 78 | +#' level expression to a terminal. It will act like a comment in the sense that |
| 79 | +#' it is a fixed `text`. |
| 80 | +#' |
| 81 | +#' Because for the usual case, it does not even matter if the cached expression |
| 82 | +#' is a terminal or not (because it is not processed), we can safely set |
| 83 | +#' `terminal = TRUE` in general. |
| 84 | +#' @section Implementation: |
| 85 | +#' Because the structure of the parse table is not always "top-level expression |
| 86 | +#' first, then children", this function creates a temporary parse table that has |
| 87 | +#' this property and then extract the ids and subset the original parse table so |
| 88 | +#' it is shallow in the right places. |
| 89 | +#' @keywords internal |
| 90 | +drop_cached_children <- function(pd) { |
| 91 | + |
| 92 | + if (cache_is_activated()) { |
| 93 | + |
| 94 | + pd_parent_first <- pd[order(pd$line1, pd$col1, -pd$line2, -pd$col2, as.integer(pd$terminal)),] |
| 95 | + pos_ids_to_keep <- pd_parent_first %>% |
| 96 | + split(cumsum(pd_parent_first$parent == 0)) %>% |
| 97 | + map(find_pos_id_to_keep) %>% |
| 98 | + unlist() %>% |
| 99 | + unname() |
| 100 | + pd[pd$pos_id %in% pos_ids_to_keep,] |
| 101 | + } else { |
| 102 | + pd |
| 103 | + } |
| 104 | + |
| 105 | +} |
| 106 | + |
| 107 | +find_pos_id_to_keep <- function(pd) { |
| 108 | + if (pd$is_cached[1]) { |
| 109 | + pd$pos_id[1] |
| 110 | + } else { |
| 111 | + pd$pos_id |
| 112 | + } |
| 113 | + } |
| 114 | + |
| 115 | + |
27 | 116 | #' Turn off styling for parts of the code |
28 | 117 | #' |
29 | 118 | #' Using stylerignore markers, you can temporarily turn off styler. See a |
@@ -137,6 +226,25 @@ add_terminal_token_before <- function(pd_flat) { |
137 | 226 | left_join(pd_flat, ., by = "id") |
138 | 227 | } |
139 | 228 |
|
| 229 | +#' Initialise variables related to caching |
| 230 | +#' |
| 231 | +#' @param transformers A list with transformer functions, used to check if |
| 232 | +#' the code is cached. |
| 233 | +#' @describeIn add_token_terminal Initializes `newlines` and `lag_newlines`. |
| 234 | +#' @keywords internal |
| 235 | +add_attributes_caching <- function(pd_flat, transformers) { |
| 236 | + pd_flat$block <- pd_flat$is_cached <- rep(NA, nrow(pd_flat)) |
| 237 | + if (cache_is_activated()) { |
| 238 | + pd_flat$is_cached[pd_flat$parent == 0] <- map_lgl( |
| 239 | + pd_flat$text[pd_flat$parent == 0], |
| 240 | + is_cached, transformers, cache_dir_default() |
| 241 | + ) |
| 242 | + is_comment <- pd_flat$token == "COMMENT" |
| 243 | + pd_flat$is_cached[is_comment] <- rep(FALSE, sum(is_comment)) |
| 244 | + } |
| 245 | + pd_flat |
| 246 | +} |
| 247 | + |
140 | 248 | #' @describeIn add_token_terminal Removes column `terimnal_token_before`. Might |
141 | 249 | #' be used to prevent the use of invalidated information, e.g. if tokens were |
142 | 250 | #' added to the nested parse table. |
@@ -220,13 +328,3 @@ combine_children <- function(child, internal_child) { |
220 | 328 | } |
221 | 329 | bound[order(bound$pos_id), ] |
222 | 330 | } |
223 | | - |
224 | | -#' Get the start right |
225 | | -#' |
226 | | -#' On what line does the first token occur? |
227 | | -#' @param pd_nested A nested parse table. |
228 | | -#' @return The line number on which the first token occurs. |
229 | | -#' @keywords internal |
230 | | -find_start_line <- function(pd_nested) { |
231 | | - pd_nested$line1[1] |
232 | | -} |
|
0 commit comments