Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@ Imports:
tibble,
tokenizers,
utils,
stats
stats,
spelling
Suggests:
knitr,
roxygen2,
testthat,
covr
Encoding: UTF-8
LazyData: yes
RoxygenNote: 6.1.1
RoxygenNote: 7.1.0
22 changes: 22 additions & 0 deletions R/count.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,23 @@ n_lowers <- function(x) {
x
}

n_misspelled <- function(x){
na <- is.na(x)
if (all(na)) return(0)
x <- purrr::map_int(x, ~ purrr::pluck(dim(spelling::spell_check_text(.x)),1))
x[na] <- NA_integer_
x
}

n_emojis <- function(x){
na <- is.na(x)
if (all(na)) return(0)
m <- gregexpr("[^\x01-\x7F]",x)
x <- purrr::map_dbl(m, ~ length(.x)/2)
x[na] <- NA_integer_
x
}

n_urls <- function(x) {
na <- is.na(x)
if (all(na)) return(0)
Expand Down Expand Up @@ -222,6 +239,7 @@ to_be <- function(x) {
purrr::map_int(x, ~ sum(fp %in% .x, na.rm = TRUE))
}


prepositions <- function(x) {
fp <- c("about", "below", "excepting", "off", "toward", "above", "beneath",
"on", "under", "across", "from", "onto", "underneath", "after", "between",
Expand Down Expand Up @@ -271,6 +289,8 @@ prepositions <- function(x) {
#' \item{\code{n_extraspaces}}{Number of times more then 1 consecutive space have been used.}
#' \item{\code{n_caps}}{Number of upper case characters.}
#' \item{\code{n_lowers}}{Number of lower case characters.}
#' \item{\code{n_misspelled}}{Number of misspelled words.}
#' \item{\code{n_emojis}}{Number of emojis.}
#' \item{\code{n_urls}}{Number of urls.}
#' \item{\code{n_uq_urls}}{Number of unique urls.}
#' \item{\code{n_nonasciis}}{Number of non ascii characters.}
Expand Down Expand Up @@ -301,6 +321,8 @@ count_functions <- list(
n_extraspaces = n_extraspaces,
n_caps = n_caps,
n_lowers = n_lowers,
n_misspelled = n_misspelled,
n_emojis = n_emojis,
n_urls = n_urls,
n_uq_urls = n_uq_urls,
n_nonasciis = n_nonasciis,
Expand Down
2 changes: 2 additions & 0 deletions R/tweet-features.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,7 @@ tweet_features <- function(text) {
o$n_puncts <- n_puncts(text)
o$n_capsp <- (o$n_caps + 1L) / (o$n_chars + 1L)
o$n_charsperword <- (o$n_chars + 1L) / (o$n_words + 1L)
o$n_misspelled <- n_misspelled(text)
o$n_emojis <- n_emojis(text)
o
}
8 changes: 6 additions & 2 deletions man/count_functions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 14 additions & 8 deletions man/textfeatures.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions man/word_dims.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.