Skip to content

Commit fd90fc3

Browse files
Merge pull request #537 from lorenzwalthert/detect-alignment
- Detect alignment (#537).
2 parents 6e78f77 + fc7c666 commit fd90fc3

24 files changed

+1780
-27
lines changed

.Rbuildignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,5 @@ CONTRIBUTING.md
1717
revdep
1818
^cran-comments\.md$
1919
^tests/testmanual$
20+
^\.pre-commit-config\.yaml$
21+
^brew\-log$

DESCRIPTION

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: styler
22
Type: Package
33
Title: Non-Invasive Pretty Printing of R Code
4-
Version: 1.1.1.9002
4+
Version: 1.1.1.9003
55
Authors@R:
66
c(person(given = "Kirill",
77
family = "Müller",
@@ -48,6 +48,8 @@ Collate:
4848
'communicate.R'
4949
'compat-dplyr.R'
5050
'compat-tidyr.R'
51+
'detect-alignment-utils.R'
52+
'detect-alignment.R'
5153
'environments.R'
5254
'expr-is.R'
5355
'indent.R'

R/detect-alignment-utils.R

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
#' Ensure the closing brace of the call is removed
2+
#'
3+
#' Must be after dropping comments because the closing brace is only guaranteed
4+
#' to be the last token in that case.
5+
#' @inheritParams alignment_drop_comments
6+
#' @importFrom rlang seq2
7+
#' @keywords internal
8+
alignment_ensure_no_closing_brace <- function(pd_by_line,
9+
last_line_droped_early) {
10+
if (last_line_droped_early) {
11+
return(pd_by_line)
12+
}
13+
last <- last(pd_by_line)
14+
if (nrow(last) == 1) {
15+
# can drop last line completely
16+
pd_by_line[-length(pd_by_line)]
17+
} else {
18+
# only drop last elment of last line
19+
pd_by_line[[length(pd_by_line)]] <- last[seq2(1, nrow(last) - 1), ]
20+
pd_by_line
21+
}
22+
}
23+
24+
#' Remove all comment tokens
25+
#'
26+
#' Must be after split by line because it invalidates (lag)newlines, which are
27+
#' used for splitting by line.
28+
#' @param pd_by_line A list, each element corresponding to a potentially
29+
#' incomplete parse table that represents all token from one line.
30+
#' @keywords internal
31+
#' @importFrom purrr map compact
32+
alignment_drop_comments <- function(pd_by_line) {
33+
map(pd_by_line, function(x) {
34+
out <- x[x$token != "COMMENT", ]
35+
if (nrow(out) < 1) {
36+
return(NULL)
37+
} else {
38+
out
39+
}
40+
}) %>%
41+
compact()
42+
}
43+
44+
#' Ensure last pd has a trailing comma
45+
#'
46+
#' Must be after [alignment_ensure_no_closing_brace()] because if it comes after
47+
#' [alignment_ensure_trailing_comma()], the last expression would not be a
48+
#' brace, which would make removal complicated.
49+
#' @inheritParams alignment_drop_comments
50+
#' @keywords internal
51+
alignment_ensure_trailing_comma <- function(pd_by_line) {
52+
last_pd <- last(pd_by_line)
53+
# needed to make sure comma is aded without space
54+
last_pd$spaces[nrow(last_pd)] <- 0
55+
if (last(last_pd$token) == "','") {
56+
return(pd_by_line)
57+
} else {
58+
pos_id <- create_pos_ids(last_pd, nrow(last_pd), after = TRUE)
59+
tokens <- create_tokens(
60+
tokens = "','",
61+
texts = ",",
62+
lag_newlines = 0,
63+
spaces = 0,
64+
pos_ids = pos_id,
65+
)
66+
tokens$.lag_spaces <- 0
67+
pd_by_line[[length(pd_by_line)]] <- rbind(last_pd, tokens)
68+
pd_by_line
69+
}
70+
}
71+
72+
#' Checks if all arguments of column 1 are named
73+
#' @param relevant_pd_by_line A list with parse tables of a multi-line call,
74+
#' excluding first and last column.
75+
#' @importFrom purrr map_lgl
76+
#' @keywords internal
77+
alignment_col1_is_named <- function(relevant_pd_by_line) {
78+
map_lgl(relevant_pd_by_line, function(x) {
79+
if (nrow(x) < 3) {
80+
return(FALSE)
81+
}
82+
identical(x$token[c(1, 3)], c("SYMBOL_SUB", "expr")) &&
83+
x$token[2] %in% c(
84+
"EQ_SUB", "SPECIAL-IN", "LT", "GT", "EQ", "NE"
85+
)
86+
}) %>%
87+
all()
88+
}
89+
90+
#' Serialize all lines for a given column
91+
#' @param column The index of the column to serialize.
92+
#' @inheritParams alignment_col1_is_named
93+
#' @importFrom purrr map
94+
#' @keywords internal
95+
alignment_serialize_column <- function(relevant_pd_by_line, column) {
96+
map(relevant_pd_by_line, alignment_serialize_line, column = column)
97+
}
98+
99+
#' Serialize one line for a column
100+
#'
101+
#'
102+
#' @inheritParams alignment_serialize_column
103+
#' @inheritParams alignment_col1_is_named
104+
alignment_serialize_line <- function(relevant_pd_by_line, column) {
105+
# TODO
106+
# better also add lover bound for column. If you already checked up to comma 2,
107+
# you don't need to re-construct text again, just check if text between comma 2
108+
# and 3 has the same length.
109+
comma_idx <- which(relevant_pd_by_line$token == "','")
110+
n_cols <- length(comma_idx)
111+
if (column > n_cols) {
112+
# line does not have values at that column
113+
return(NULL)
114+
} else {
115+
relevant_comma <- comma_idx[column]
116+
}
117+
118+
relevant_pd_by_line <- relevant_pd_by_line[seq2(1, relevant_comma), ]
119+
alignment_serialize(relevant_pd_by_line)
120+
}
121+
122+
#' Serialize text from a parse table
123+
#'
124+
#' Line breaks are ignored as they are expected to be checked in
125+
#' [token_is_on_aligned_line()].
126+
#' @inheritParams alignment_drop_comments
127+
#' @keywords internal
128+
alignment_serialize <- function(pd_sub) {
129+
out <- Map(function(terminal, text, child, spaces, newlines) {
130+
if (terminal) {
131+
return(paste0(text, rep_char(" ", spaces)))
132+
} else {
133+
return(paste0(alignment_serialize(child), rep_char(" ", spaces)))
134+
}
135+
}, pd_sub$terminal, pd_sub$text, pd_sub$child, pd_sub$spaces, pd_sub$newlines)
136+
if (anyNA(out)) {
137+
return(NA)
138+
} else {
139+
paste0(out, collapse = "")
140+
}
141+
}
142+
143+
#' Check if spacing around comma is correcr
144+
#'
145+
#' At least one space after comma, none before, for all but the last comma on
146+
#' the line
147+
#' @param pd_sub The subset of a parse table corresponding to one line.
148+
#' @importFrom rlang seq2
149+
#' @keywords internal
150+
alignment_has_correct_spacing_around_comma <- function(pd_sub) {
151+
comma_tokens <- which(pd_sub$token == "','")
152+
if (length(comma_tokens) == 0) {
153+
return(TRUE)
154+
}
155+
relevant_comma_token <- comma_tokens[seq2(1, length(comma_tokens) - 1L)]
156+
correct_spaces_before <- pd_sub$.lag_spaces[relevant_comma_token] == 0
157+
correct_spaces_after <- pd_sub$spaces[relevant_comma_token] > 0
158+
all(correct_spaces_before) && all(correct_spaces_after)
159+
}
160+
161+
#' Check if spacing around `=` is correct
162+
#'
163+
#' At least one space around `EQ_SUB`
164+
#' @inheritParams alignment_has_correct_spacing_around_comma
165+
#' @keywords internal
166+
#' @importFrom rlang seq2
167+
alignment_has_correct_spacing_around_eq_sub <- function(pd_sub) {
168+
relevant_eq_sub_token <- which(pd_sub$token == "EQ_SUB")
169+
if (length(relevant_eq_sub_token) == 0) {
170+
return(TRUE)
171+
}
172+
173+
correct_spaces_before <- pd_sub$.lag_spaces[relevant_eq_sub_token] >= 1
174+
correct_spaces_after <- pd_sub$spaces[relevant_eq_sub_token] >= 1
175+
all(correct_spaces_before) && all(correct_spaces_after)
176+
}

R/detect-alignment.R

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#' Check if tokens are aligned
2+
#'
3+
#' If all tokens are aligned, `TRUE` is returned, otherwise `FALSE`. The
4+
#' function only checks for alignment of function calls. This can be
5+
#' recycled conveniently later if needed as a vector with length > 1.
6+
#' @param pd_flat A flat parse table.
7+
#' @details
8+
#' Multiple lines are called aligned if the following conditions hold for all
9+
#' but the first line of the expression:
10+
#'
11+
#' * lag spaces of column 1 must agree.
12+
#' * spacing around comma (0 before, > 1 after) and spacing around `=` (at least
13+
#' one around).
14+
#' * all positions of commas of col > 2 must agree (needs recursive creation of
15+
#' `text`).
16+
#'
17+
#' Because of the last requirement, this function is very expensive to run. For
18+
#' this reason, the following approach is taken:
19+
#'
20+
#' * Only invoke the function when certain that alignment is possible.
21+
#' * Check the cheap conditions first.
22+
#' * For the recursive creation of text, greedily check column by column to make
23+
#' sure we can stop as soon as we found that columns are not aligned.
24+
#'
25+
#' @importFrom purrr map_int map_lgl map compact
26+
#' @importFrom rlang seq2
27+
token_is_on_aligned_line <- function(pd_flat) {
28+
29+
line_idx <- 1 + cumsum(pd_flat$lag_newlines)
30+
pd_flat$.lag_spaces <- lag(pd_flat$spaces)
31+
pd_by_line <- split(pd_flat, line_idx)
32+
last_line_is_closing_brace_only <- nrow(last(pd_by_line)) == 1
33+
relevant_idx <- seq2(2, ifelse(last_line_is_closing_brace_only,
34+
length(pd_by_line) - 1,
35+
length(pd_by_line)
36+
))
37+
pd_by_line <- pd_by_line[relevant_idx]
38+
39+
relevant_lag_spaces_col_1 <- map_int(pd_by_line, ~ .x$.lag_spaces[1])
40+
41+
col1_is_aligned <- length(unique(relevant_lag_spaces_col_1)) == 1
42+
if (!col1_is_aligned) {
43+
return(FALSE)
44+
}
45+
has_correct_spacing_around_comma <- map_lgl(
46+
pd_by_line, alignment_has_correct_spacing_around_comma
47+
)
48+
if (!all(has_correct_spacing_around_comma)) {
49+
return(FALSE)
50+
}
51+
52+
has_correct_spacing_around_eq_sub <- map_lgl(
53+
pd_by_line, alignment_has_correct_spacing_around_eq_sub
54+
)
55+
56+
if (!all(has_correct_spacing_around_eq_sub)) {
57+
return(FALSE)
58+
}
59+
starting_with_comma <- map_lgl(pd_by_line, ~ .x$token[1] == "','")
60+
if (any(starting_with_comma)) {
61+
return(FALSE)
62+
}
63+
pd_is_multi_line <- map_lgl(pd_by_line, ~ any(.x$multi_line, na.rm = TRUE))
64+
if (any(pd_is_multi_line)) {
65+
return(FALSE)
66+
}
67+
68+
pd_by_line <- alignment_drop_comments(pd_by_line) %>%
69+
alignment_ensure_no_closing_brace(last_line_is_closing_brace_only) %>%
70+
alignment_ensure_trailing_comma()
71+
# now, pd only contains arguments separated by values, ideal for iterating
72+
# over columns.
73+
# cannot use lag_newlines anymore since we removed tokens.
74+
pd_by_line <- map(pd_by_line, function(pd_sub) {
75+
pd_sub$lag_newlines <- NULL
76+
pd_sub
77+
})
78+
79+
n_cols <- map_int(pd_by_line, ~ sum(.x$token == "','"))
80+
start <- ifelse(all(alignment_col1_is_named(pd_by_line)), 1, 2)
81+
82+
for (column in seq2(start, max(n_cols))) {
83+
char_len <- alignment_serialize_column(pd_by_line, column) %>%
84+
compact() %>%
85+
unlist() %>%
86+
trimws(which = "right") %>%
87+
nchar()
88+
89+
is_aligned <- length(unique(char_len)) == 1
90+
91+
if (!is_aligned) {
92+
return(FALSE)
93+
}
94+
}
95+
TRUE
96+
}

R/rules-spacing.R

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,42 @@
1+
#' Set spaces around operators
2+
#'
3+
#' Alignement is kept, if detected.
14
#' @include token-define.R
25
#' @keywords internal
3-
add_space_around_op <- function(pd_flat) {
4-
op_after <- pd_flat$token %in% op_token
5-
op_before <- lead(op_after, default = FALSE)
6-
idx_before <- op_before & (pd_flat$newlines == 0L)
7-
pd_flat$spaces[idx_before] <- pmax(pd_flat$spaces[idx_before], 1L)
8-
idx_after <- op_after & (pd_flat$newlines == 0L)
9-
pd_flat$spaces[idx_after] <- pmax(pd_flat$spaces[idx_after], 1L)
10-
pd_flat
11-
}
12-
136
#' @include token-define.R
14-
#' @keywords internal
15-
set_space_around_op <- function(pd_flat) {
7+
set_space_around_op <- function(pd_flat, strict) {
8+
# spacing and operator in same function because alternative is
9+
# calling token_is_on_aligned_line() twice because comma and operator spacing
10+
# depends on it.
11+
pd_flat <- add_space_after_comma(pd_flat)
1612
op_after <- pd_flat$token %in% op_token
13+
op_before <- lead(op_after, default = FALSE)
14+
# include comma, but only for after
15+
op_after <- op_after | pd_flat$token == "','"
1716
if (!any(op_after)) {
1817
return(pd_flat)
1918
}
20-
op_before <- lead(op_after, default = FALSE)
21-
pd_flat$spaces[op_before & (pd_flat$newlines == 0L)] <- 1L
22-
pd_flat$spaces[op_after & (pd_flat$newlines == 0L)] <- 1L
19+
if (sum(pd_flat$lag_newlines) > 2 &&
20+
is_function_call(pd_flat) &&
21+
any(pd_flat$token %in% c("EQ_SUB", "','"))
22+
) {
23+
is_on_aligned_line <- token_is_on_aligned_line(pd_flat)
24+
} else {
25+
is_on_aligned_line <- FALSE
26+
}
27+
# operator
28+
must_have_space_before <- op_before & (pd_flat$newlines == 0L) & !is_on_aligned_line
29+
pd_flat$spaces[must_have_space_before] <- if (strict) {
30+
1L
31+
} else {
32+
pmax(pd_flat$spaces[must_have_space_before], 1L)
33+
}
34+
must_have_space_after <- op_after & (pd_flat$newlines == 0L) & !is_on_aligned_line
35+
pd_flat$spaces[must_have_space_after] <- if (strict) {
36+
1L
37+
} else {
38+
pmax(pd_flat$spaces[must_have_space_after], 1L)
39+
}
2340
pd_flat
2441
}
2542

R/style-guides.R

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -85,16 +85,9 @@ tidyverse_style <- function(scope = "tokens",
8585
style_space_around_tilde,
8686
strict = strict
8787
),
88-
spacing_around_op = if (strict) {
89-
set_space_around_op
90-
} else {
91-
add_space_around_op
92-
},
93-
spacing_around_comma = if (strict) {
94-
set_space_after_comma
95-
} else {
96-
add_space_after_comma
97-
},
88+
spacing_around_op = purrr::partial(set_space_around_op,
89+
strict = strict
90+
),
9891
remove_space_after_opening_paren,
9992
remove_space_after_excl,
10093
set_space_after_bang_bang,

R/utils-navigate-nest.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ next_non_comment <- function(pd, pos) {
1717

1818
#' @rdname next_non_comment
1919
previous_non_comment <- function(pd, pos) {
20-
if (length(pos) < 1 || is.na(pos) || pos >= nrow(pd)) {
20+
if (length(pos) < 1 || is.na(pos) || pos > nrow(pd)) {
2121
return(integer(0))
2222
}
2323
candidates <- seq2(1L, pos - 1L)

brew-log

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+

0 commit comments

Comments
 (0)