r-lib
diff --git a/‎.Rbuildignore‎
Lines changed: 2 additions & 0 deletions b/‎.Rbuildignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 3 additions & 1 deletion b/‎DESCRIPTION‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎R/detect-alignment-utils.R‎
Lines changed: 176 additions & 0 deletions b/‎R/detect-alignment-utils.R‎
Lines changed: 176 additions & 0 deletions
diff --git a/‎R/detect-alignment.R‎
Lines changed: 96 additions & 0 deletions b/‎R/detect-alignment.R‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎R/rules-spacing.R‎
Lines changed: 32 additions & 15 deletions b/‎R/rules-spacing.R‎
Lines changed: 32 additions & 15 deletions
diff --git a/‎R/style-guides.R‎
Lines changed: 3 additions & 10 deletions b/‎R/style-guides.R‎
Lines changed: 3 additions & 10 deletions
diff --git a/‎R/utils-navigate-nest.R‎
Lines changed: 1 addition & 1 deletion b/‎R/utils-navigate-nest.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎brew-log‎
Lines changed: 2 additions & 0 deletions b/‎brew-log‎
Lines changed: 2 additions & 0 deletions
@@ -17,3 +17,5 @@ CONTRIBUTING.md
 revdep
 ^cran-comments\.md$
 ^tests/testmanual$
+^\.pre-commit-config\.yaml$
+^brew\-log$
@@ -1,7 +1,7 @@
 Package: styler
 Type: Package
 Title: Non-Invasive Pretty Printing of R Code
-Version: 1.1.1.9002
+Version: 1.1.1.9003
 Authors@R: 
     c(person(given = "Kirill",
              family = "Müller",
@@ -48,6 +48,8 @@ Collate:
     'communicate.R'
     'compat-dplyr.R'
     'compat-tidyr.R'
+    'detect-alignment-utils.R'
+    'detect-alignment.R'
     'environments.R'
     'expr-is.R'
     'indent.R'
 
@@ -0,0 +1,176 @@
+#' Ensure the closing brace of the call is removed
+#'
+#' Must be after dropping comments because the closing brace is only guaranteed
+#' to be the last token in that case.
+#' @inheritParams alignment_drop_comments
+#' @importFrom rlang seq2
+#' @keywords internal
+alignment_ensure_no_closing_brace <- function(pd_by_line,
+                                              last_line_droped_early) {
+  if (last_line_droped_early) {
+    return(pd_by_line)
+  }
+  last <- last(pd_by_line)
+  if (nrow(last) == 1) {
+    # can drop last line completely
+    pd_by_line[-length(pd_by_line)]
+  } else {
+    # only drop last elment of last line
+    pd_by_line[[length(pd_by_line)]] <- last[seq2(1, nrow(last) - 1), ]
+    pd_by_line
+  }
+}
+
+#' Remove all comment tokens
+#'
+#' Must be after split by line because it invalidates (lag)newlines, which are
+#' used for splitting by line.
+#' @param pd_by_line A list, each element corresponding to a potentially
+#'   incomplete parse table that represents all token from one line.
+#' @keywords internal
+#' @importFrom purrr map compact
+alignment_drop_comments <- function(pd_by_line) {
+  map(pd_by_line, function(x) {
+    out <- x[x$token != "COMMENT", ]
+    if (nrow(out) < 1) {
+      return(NULL)
+    } else {
+      out
+    }
+  }) %>%
+    compact()
+}
+
+#' Ensure last pd has a trailing comma
+#'
+#' Must be after [alignment_ensure_no_closing_brace()] because if it comes after
+#' [alignment_ensure_trailing_comma()], the last expression would not be a
+#' brace, which would make removal complicated.
+#' @inheritParams alignment_drop_comments
+#' @keywords internal
+alignment_ensure_trailing_comma <- function(pd_by_line) {
+  last_pd <- last(pd_by_line)
+  # needed to make sure comma is aded without space
+  last_pd$spaces[nrow(last_pd)] <- 0
+  if (last(last_pd$token) == "','") {
+    return(pd_by_line)
+  } else {
+    pos_id <- create_pos_ids(last_pd, nrow(last_pd), after = TRUE)
+    tokens <- create_tokens(
+      tokens = "','",
+      texts = ",",
+      lag_newlines = 0,
+      spaces = 0,
+      pos_ids = pos_id,
+    )
+    tokens$.lag_spaces <- 0
+    pd_by_line[[length(pd_by_line)]] <- rbind(last_pd, tokens)
+    pd_by_line
+  }
+}
+
+#' Checks if all arguments of column 1 are named
+#' @param relevant_pd_by_line A list with parse tables of a multi-line call,
+#'   excluding first and last column.
+#' @importFrom purrr map_lgl
+#' @keywords internal
+alignment_col1_is_named <- function(relevant_pd_by_line) {
+  map_lgl(relevant_pd_by_line, function(x) {
+    if (nrow(x) < 3) {
+      return(FALSE)
+    }
+    identical(x$token[c(1, 3)], c("SYMBOL_SUB", "expr")) &&
+      x$token[2] %in% c(
+        "EQ_SUB", "SPECIAL-IN", "LT", "GT", "EQ", "NE"
+      )
+  }) %>%
+    all()
+}
+
+#' Serialize all lines for a given column
+#' @param column The index of the column to serialize.
+#' @inheritParams alignment_col1_is_named
+#' @importFrom purrr map
+#' @keywords internal
+alignment_serialize_column <- function(relevant_pd_by_line, column) {
+  map(relevant_pd_by_line, alignment_serialize_line, column = column)
+}
+
+#' Serialize one line for a column
+#'
+#'
+#' @inheritParams alignment_serialize_column
+#' @inheritParams alignment_col1_is_named
+alignment_serialize_line <- function(relevant_pd_by_line, column) {
+  # TODO
+  # better also add lover bound for column. If you already checked up to comma 2,
+  # you don't need to re-construct text again, just check if text between comma 2
+  # and 3 has the same length.
+  comma_idx <- which(relevant_pd_by_line$token == "','")
+  n_cols <- length(comma_idx)
+  if (column > n_cols) {
+    # line does not have values at that column
+    return(NULL)
+  } else {
+    relevant_comma <- comma_idx[column]
+  }
+
+  relevant_pd_by_line <- relevant_pd_by_line[seq2(1, relevant_comma), ]
+  alignment_serialize(relevant_pd_by_line)
+}
+
+#' Serialize text from a parse table
+#'
+#' Line breaks are ignored as they are expected to be checked in
+#' [token_is_on_aligned_line()].
+#' @inheritParams alignment_drop_comments
+#' @keywords internal
+alignment_serialize <- function(pd_sub) {
+  out <- Map(function(terminal, text, child, spaces, newlines) {
+    if (terminal) {
+      return(paste0(text, rep_char(" ", spaces)))
+    } else {
+      return(paste0(alignment_serialize(child), rep_char(" ", spaces)))
+    }
+  }, pd_sub$terminal, pd_sub$text, pd_sub$child, pd_sub$spaces, pd_sub$newlines)
+  if (anyNA(out)) {
+    return(NA)
+  } else {
+    paste0(out, collapse = "")
+  }
+}
+
+#' Check if spacing around comma is correcr
+#'
+#' At least one space after comma, none before, for all but the last comma on
+#' the line
+#' @param pd_sub The subset of a parse table corresponding to one line.
+#' @importFrom rlang seq2
+#' @keywords internal
+alignment_has_correct_spacing_around_comma <- function(pd_sub) {
+  comma_tokens <- which(pd_sub$token == "','")
+  if (length(comma_tokens) == 0) {
+    return(TRUE)
+  }
+  relevant_comma_token <- comma_tokens[seq2(1, length(comma_tokens) - 1L)]
+  correct_spaces_before <- pd_sub$.lag_spaces[relevant_comma_token] == 0
+  correct_spaces_after <- pd_sub$spaces[relevant_comma_token] > 0
+  all(correct_spaces_before) && all(correct_spaces_after)
+}
+
+#' Check if spacing around `=` is correct
+#'
+#' At least one space around `EQ_SUB`
+#' @inheritParams alignment_has_correct_spacing_around_comma
+#' @keywords internal
+#' @importFrom rlang seq2
+alignment_has_correct_spacing_around_eq_sub <- function(pd_sub) {
+  relevant_eq_sub_token <- which(pd_sub$token == "EQ_SUB")
+  if (length(relevant_eq_sub_token) == 0) {
+    return(TRUE)
+  }
+
+  correct_spaces_before <- pd_sub$.lag_spaces[relevant_eq_sub_token] >= 1
+  correct_spaces_after <- pd_sub$spaces[relevant_eq_sub_token] >= 1
+  all(correct_spaces_before) && all(correct_spaces_after)
+}
@@ -0,0 +1,96 @@
+#' Check if tokens are aligned
+#'
+#' If all tokens are aligned, `TRUE` is returned, otherwise `FALSE`. The
+#' function only checks for alignment of function calls. This can be
+#' recycled conveniently later if needed as a vector with length > 1.
+#' @param pd_flat A flat parse table.
+#' @details
+#' Multiple lines are called aligned if the following conditions hold for all
+#' but the first line of the expression:
+#'
+#' * lag spaces of column 1 must agree.
+#' * spacing around comma (0 before, > 1 after) and spacing around `=` (at least
+#'   one around).
+#' * all positions of commas of col > 2 must agree (needs recursive creation of
+#'   `text`).
+#'
+#' Because of the last requirement, this function is very expensive to run. For
+#' this reason, the following approach is taken:
+#'
+#' * Only invoke the function when certain that alignment is possible.
+#' * Check the cheap conditions first.
+#' * For the recursive creation of text, greedily check column by column to make
+#'   sure we can stop as soon as we found that columns are not aligned.
+#'
+#' @importFrom purrr map_int map_lgl map compact
+#' @importFrom rlang seq2
+token_is_on_aligned_line <- function(pd_flat) {
+
+  line_idx <- 1 + cumsum(pd_flat$lag_newlines)
+  pd_flat$.lag_spaces <- lag(pd_flat$spaces)
+  pd_by_line <- split(pd_flat, line_idx)
+  last_line_is_closing_brace_only <- nrow(last(pd_by_line)) == 1
+  relevant_idx <- seq2(2, ifelse(last_line_is_closing_brace_only,
+    length(pd_by_line) - 1,
+    length(pd_by_line)
+  ))
+  pd_by_line <- pd_by_line[relevant_idx]
+
+  relevant_lag_spaces_col_1 <- map_int(pd_by_line, ~ .x$.lag_spaces[1])
+
+  col1_is_aligned <- length(unique(relevant_lag_spaces_col_1)) == 1
+  if (!col1_is_aligned) {
+    return(FALSE)
+  }
+  has_correct_spacing_around_comma <- map_lgl(
+    pd_by_line, alignment_has_correct_spacing_around_comma
+  )
+  if (!all(has_correct_spacing_around_comma)) {
+    return(FALSE)
+  }
+
+  has_correct_spacing_around_eq_sub <- map_lgl(
+    pd_by_line, alignment_has_correct_spacing_around_eq_sub
+  )
+
+  if (!all(has_correct_spacing_around_eq_sub)) {
+    return(FALSE)
+  }
+  starting_with_comma <- map_lgl(pd_by_line, ~ .x$token[1] == "','")
+  if (any(starting_with_comma)) {
+    return(FALSE)
+  }
+  pd_is_multi_line <- map_lgl(pd_by_line, ~ any(.x$multi_line, na.rm = TRUE))
+  if (any(pd_is_multi_line)) {
+    return(FALSE)
+  }
+
+  pd_by_line <- alignment_drop_comments(pd_by_line) %>%
+    alignment_ensure_no_closing_brace(last_line_is_closing_brace_only) %>%
+    alignment_ensure_trailing_comma()
+  # now, pd only contains arguments separated by values, ideal for iterating
+  # over columns.
+  # cannot use lag_newlines anymore since we removed tokens.
+  pd_by_line <- map(pd_by_line, function(pd_sub) {
+    pd_sub$lag_newlines <- NULL
+    pd_sub
+  })
+
+  n_cols <- map_int(pd_by_line, ~ sum(.x$token == "','"))
+  start <- ifelse(all(alignment_col1_is_named(pd_by_line)), 1, 2)
+
+  for (column in seq2(start, max(n_cols))) {
+    char_len <- alignment_serialize_column(pd_by_line, column) %>%
+      compact() %>%
+      unlist() %>%
+      trimws(which = "right") %>%
+      nchar()
+
+    is_aligned <- length(unique(char_len)) == 1
+
+    if (!is_aligned) {
+      return(FALSE)
+    }
+  }
+  TRUE
+}
@@ -1,25 +1,42 @@
+#' Set spaces around operators
+#'
+#' Alignement is kept, if detected.
 #' @include token-define.R
 #' @keywords internal
-add_space_around_op <- function(pd_flat) {
-  op_after <- pd_flat$token %in% op_token
-  op_before <- lead(op_after, default = FALSE)
-  idx_before <- op_before & (pd_flat$newlines == 0L)
-  pd_flat$spaces[idx_before] <- pmax(pd_flat$spaces[idx_before], 1L)
-  idx_after <- op_after & (pd_flat$newlines == 0L)
-  pd_flat$spaces[idx_after] <- pmax(pd_flat$spaces[idx_after], 1L)
-  pd_flat
-}
-
 #' @include token-define.R
-#' @keywords internal
-set_space_around_op <- function(pd_flat) {
+set_space_around_op <- function(pd_flat, strict) {
+  # spacing and operator in same function because alternative is
+  # calling token_is_on_aligned_line() twice because comma and operator spacing
+  # depends on it.
+  pd_flat <- add_space_after_comma(pd_flat)
   op_after <- pd_flat$token %in% op_token
+  op_before <- lead(op_after, default = FALSE)
+  # include comma, but only for after
+  op_after <- op_after | pd_flat$token == "','"
   if (!any(op_after)) {
     return(pd_flat)
   }
-  op_before <- lead(op_after, default = FALSE)
-  pd_flat$spaces[op_before & (pd_flat$newlines == 0L)] <- 1L
-  pd_flat$spaces[op_after & (pd_flat$newlines == 0L)] <- 1L
+  if (sum(pd_flat$lag_newlines) > 2 &&
+      is_function_call(pd_flat) &&
+      any(pd_flat$token %in% c("EQ_SUB", "','"))
+  ) {
+    is_on_aligned_line <- token_is_on_aligned_line(pd_flat)
+  } else {
+    is_on_aligned_line <- FALSE
+  }
+  # operator
+  must_have_space_before <- op_before & (pd_flat$newlines == 0L) & !is_on_aligned_line
+  pd_flat$spaces[must_have_space_before] <- if (strict) {
+    1L
+  } else {
+    pmax(pd_flat$spaces[must_have_space_before], 1L)
+  }
+  must_have_space_after <- op_after & (pd_flat$newlines == 0L) & !is_on_aligned_line
+  pd_flat$spaces[must_have_space_after] <- if (strict) {
+    1L
+  } else {
+    pmax(pd_flat$spaces[must_have_space_after], 1L)
+  }
   pd_flat
 }
 
 
@@ -85,16 +85,9 @@ tidyverse_style <- function(scope = "tokens",
         style_space_around_tilde,
         strict = strict
       ),
-      spacing_around_op = if (strict) {
-        set_space_around_op
-      } else {
-        add_space_around_op
-      },
-      spacing_around_comma = if (strict) {
-        set_space_after_comma
-      } else {
-        add_space_after_comma
-      },
+      spacing_around_op = purrr::partial(set_space_around_op,
+        strict = strict
+      ),
       remove_space_after_opening_paren,
       remove_space_after_excl,
       set_space_after_bang_bang,
 
@@ -17,7 +17,7 @@ next_non_comment <- function(pd, pos) {
 
 #' @rdname next_non_comment
 previous_non_comment <- function(pd, pos) {
-  if (length(pos) < 1 || is.na(pos) || pos >= nrow(pd)) {
+  if (length(pos) < 1 || is.na(pos) || pos > nrow(pd)) {
     return(integer(0))
   }
   candidates <- seq2(1L, pos - 1L)
 
@@ -0,0 +1,2 @@
+
+
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@ next_non_comment <- function(pd, pos) {`
`17`	`17`
`18`	`18`	`#' @rdname next_non_comment`
`19`	`19`	`previous_non_comment <- function(pd, pos) {`
`20`		`- if (length(pos) < 1 \|\| is.na(pos) \|\| pos >= nrow(pd)) {`
	`20`	`+ if (length(pos) < 1 \|\| is.na(pos) \|\| pos > nrow(pd)) {`
`21`	`21`	`return(integer(0))`
`22`	`22`	`}`
`23`	`23`	`candidates <- seq2(1L, pos - 1L)`