|
| 1 | +#' Package Spell Checking |
| 2 | +#' |
| 3 | +#' Automatically spell-check bookdown source files. |
| 4 | +#' |
| 5 | +#' Parses and checks bookdown source files, and if these files are present |
| 6 | +#' * README.md (when there's no README.Rmd) |
| 7 | +#' * NEWS.md |
| 8 | +#' * text fields in the `DESCRIPTION` file if present. |
| 9 | +#' |
| 10 | +#' The preferred spelling language (typically `en-GB` or `en-US`) should be |
| 11 | +#' * specified in the `Language` field from your package `DESCRIPTION`, |
| 12 | +#' * or entered as the `lang` argument. |
| 13 | +#' To whitelist custom words |
| 14 | +#' use the bookdown [WORDLIST][get_wordlist] file which will be added to the dictionary |
| 15 | +#' when spell checking. See [update_wordlist] to automatically populate and update this |
| 16 | +#' file. |
| 17 | +#' |
| 18 | +#' Hunspell includes dictionaries for `en_US` and `en_GB` by default. Other languages |
| 19 | +#' require installation of a custom dictionary, see [hunspell][hunspell::hunspell] for details. |
| 20 | +#' |
| 21 | +#' @export |
| 22 | +#' @rdname spell_check_bookdown |
| 23 | +#' @name spell_check_bookdown |
| 24 | +#' @aliases spelling |
| 25 | +#' @family spelling |
| 26 | +#' @param path path to package root directory containing the `DESCRIPTION` file |
| 27 | +#' @param use_wordlist ignore words in the package [WORDLIST][get_wordlist] file |
| 28 | +#' @param lang set `Language` field in `DESCRIPTION` e.g. `"en-US"` or `"en-GB"`. |
| 29 | +#' For supporting other languages, see the [hunspell vignette](https://bit.ly/2EquLKy). |
| 30 | +spell_check_bookdown <- function(path = ".", lang = NULL, use_wordlist = TRUE){ |
| 31 | + |
| 32 | + # Get language |
| 33 | + if (is.null(lang)) { |
| 34 | + if (file.exists(file.path(path, "DESCRIPTION"))){ |
| 35 | + pkg <- as_package(path) |
| 36 | + |
| 37 | + # Get language from DESCRIPTION |
| 38 | + lang <- normalize_lang(pkg$language) |
| 39 | + } else { |
| 40 | + "en-US" |
| 41 | + } |
| 42 | + } else { |
| 43 | + lang <- normalize_lang(lang) |
| 44 | + } |
| 45 | + |
| 46 | + # Add custom words to the ignore list |
| 47 | + add_words <- if(isTRUE(use_wordlist)) |
| 48 | + get_wordlist(path) |
| 49 | + |
| 50 | + if (file.exists(file.path(path, "DESCRIPTION"))){ |
| 51 | + pkg <- as_package(path) |
| 52 | + author <- if(length(pkg[['authors@r']])){ |
| 53 | + parse_r_field(pkg[['authors@r']]) |
| 54 | + } else { |
| 55 | + strsplit(pkg[['author']], " ", fixed = TRUE)[[1]] |
| 56 | + } |
| 57 | + |
| 58 | + meta <- c(pkg$package, author) |
| 59 | + } else { |
| 60 | + meta <- NULL |
| 61 | + } |
| 62 | + ignore <- unique(c(meta, hunspell::en_stats, add_words)) |
| 63 | + |
| 64 | + # Create the hunspell dictionary object |
| 65 | + dict <- hunspell::dictionary(lang, add_words = sort(ignore)) |
| 66 | + |
| 67 | + # Where to check for rmd/md files |
| 68 | + bookdown_files <- list.files(file.path(pkg$path), pattern = "\\.r?md$", |
| 69 | + ignore.case = TRUE, full.names = TRUE, recursive = TRUE) |
| 70 | + root_files <- list.files(pkg$path, pattern = "(readme|news|changes|index).r?md", |
| 71 | + ignore.case = TRUE, full.names = TRUE) |
| 72 | + |
| 73 | + # Markdown files |
| 74 | + md_files <- normalizePath(c(root_files, bookdown_files)) |
| 75 | + md_lines <- lapply(sort(md_files), spell_check_file_md, dict = dict) |
| 76 | + |
| 77 | + all_sources <- md_files |
| 78 | + all_lines <- md_lines |
| 79 | + |
| 80 | + # Check 'DESCRIPTION' fields |
| 81 | + if (file.exists(file.path(path, "DESCRIPTION"))){ |
| 82 | + pkg_fields <- c("title", "description") |
| 83 | + pkg_lines <- lapply(pkg_fields, function(x){ |
| 84 | + spell_check_file_text(textConnection(pkg[[x]]), dict = dict) |
| 85 | + }) |
| 86 | + |
| 87 | + all_sources <- c(all_sources, pkg_fields) |
| 88 | + all_lines <- c(all_lines, pkg_lines) |
| 89 | + } |
| 90 | + |
| 91 | + summarize_words(all_sources, all_lines) |
| 92 | +} |
0 commit comments