From bdd0d25e4f1f35d7181a3ec2eaef6afb577c9774 Mon Sep 17 00:00:00 2001
From: mcnakhaee <mcnakhaee@gmail.org>
Date: Mon, 12 Oct 2020 21:54:53 +0200
Subject: [PATCH 1/4] update Namespace

---
 DESCRIPTION | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ec29b6e..6c5eda1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -27,7 +27,8 @@ Imports:
     tibble,
     tokenizers,
     utils,
-    stats
+    stats,
+    spelling
 Suggests: 
     knitr,
     roxygen2,
@@ -35,4 +36,4 @@ Suggests:
     covr
 Encoding: UTF-8
 LazyData: yes
-RoxygenNote: 6.1.1
+RoxygenNote: 7.1.0

From cd89ed10459960809f596a6ee3203d36755fe534 Mon Sep 17 00:00:00 2001
From: mcnakhaee <mcnakhaee@gmail.org>
Date: Mon, 12 Oct 2020 21:55:32 +0200
Subject: [PATCH 2/4] add n_misspelling and n_emojis

---
 R/count.R | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/R/count.R b/R/count.R
index c7ca522..17b74ca 100644
--- a/R/count.R
+++ b/R/count.R
@@ -145,6 +145,23 @@ n_lowers <- function(x) {
   x
 }
 
+n_misspelled <- function(x){
+  na <- is.na(x)
+  if (all(na)) return(0)
+  x <- purrr::map_int(x, ~ purrr::pluck(dim(spelling::spell_check_text(.x)),1))
+  x[na] <- NA_integer_
+  x
+}
+
+n_emojis <- function(x){
+  na <- is.na(x)
+  if (all(na)) return(0)
+  m <- gregexpr("[^\x01-\x7F]",x)
+  x <- purrr::map_dbl(m, ~ length(.x)/2)
+  x[na] <- NA_integer_
+  x
+}
+
 n_urls <- function(x) {
   na <- is.na(x)
   if (all(na)) return(0)
@@ -222,6 +239,7 @@ to_be <- function(x) {
   purrr::map_int(x, ~ sum(fp %in% .x, na.rm = TRUE))
 }
 
+
 prepositions <- function(x) {
   fp <- c("about", "below", "excepting", "off", "toward", "above", "beneath",
     "on", "under", "across", "from", "onto", "underneath", "after", "between",
@@ -271,6 +289,8 @@ prepositions <- function(x) {
 #' \item{\code{n_extraspaces}}{Number of times more then 1 consecutive space have been used.}
 #' \item{\code{n_caps}}{Number of upper case characters.}
 #' \item{\code{n_lowers}}{Number of lower case characters.}
+#' \item{\code{n_misspelled}}{Number of misspelled words.}
+#' \item{\code{n_emojis}}{Number of emojis.}
 #' \item{\code{n_urls}}{Number of urls.}
 #' \item{\code{n_uq_urls}}{Number of unique urls.}
 #' \item{\code{n_nonasciis}}{Number of non ascii characters.}
@@ -301,6 +321,8 @@ count_functions <- list(
   n_extraspaces = n_extraspaces,
   n_caps = n_caps,
   n_lowers = n_lowers,
+  n_misspelled = n_misspelled,
+  n_emojis = n_emojis,
   n_urls = n_urls,
   n_uq_urls = n_uq_urls,
   n_nonasciis = n_nonasciis,

From 8078a062212de5702be611e5021e2a85d978022e Mon Sep 17 00:00:00 2001
From: mcnakhaee <mcnakhaee@gmail.org>
Date: Mon, 12 Oct 2020 21:55:59 +0200
Subject: [PATCH 3/4] add n_misspelled and n_emojis

---
 R/tweet-features.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/tweet-features.R b/R/tweet-features.R
index a627bd0..a731e8d 100644
--- a/R/tweet-features.R
+++ b/R/tweet-features.R
@@ -30,5 +30,7 @@ tweet_features <- function(text) {
   o$n_puncts <- n_puncts(text)
   o$n_capsp <- (o$n_caps + 1L) / (o$n_chars + 1L)
   o$n_charsperword <- (o$n_chars + 1L) / (o$n_words + 1L)
+  o$n_misspelled <- n_misspelled(text)
+  o$n_emojis <- n_emojis(text)
   o
 }

From 0f5c5cc598735fc05f63580e2e245715e0cb880e Mon Sep 17 00:00:00 2001
From: mcnakhaee <mcnakhaee@gmail.org>
Date: Mon, 12 Oct 2020 21:56:17 +0200
Subject: [PATCH 4/4] add n_misspelled and n_emojis

---
 man/count_functions.Rd |  8 ++++++--
 man/textfeatures.Rd    | 22 ++++++++++++++--------
 man/word_dims.Rd       |  8 ++++----
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/man/count_functions.Rd b/man/count_functions.Rd
index 5ac3b06..9c895bc 100644
--- a/man/count_functions.Rd
+++ b/man/count_functions.Rd
@@ -4,7 +4,8 @@
 \name{count_functions}
 \alias{count_functions}
 \title{List of all feature counting functions}
-\format{Named list of all ferature counting functions
+\format{
+Named list of all ferature counting functions
 \describe{
 \item{\code{n_words}}{Number of words.}
 \item{\code{n_uq_words}}{Number of unique words.}
@@ -21,6 +22,8 @@
 \item{\code{n_extraspaces}}{Number of times more then 1 consecutive space have been used.}
 \item{\code{n_caps}}{Number of upper case characters.}
 \item{\code{n_lowers}}{Number of lower case characters.}
+\item{\code{n_misspelled}}{Number of misspelled words.}
+\item{\code{n_emojis}}{Number of emojis.}
 \item{\code{n_urls}}{Number of urls.}
 \item{\code{n_uq_urls}}{Number of unique urls.}
 \item{\code{n_nonasciis}}{Number of non ascii characters.}
@@ -33,7 +36,8 @@
 \item{\code{third_person}}{Number of "third person" words.}
 \item{\code{to_be}}{Number of "to be" words.}
 \item{\code{prepositions}}{Number of preposition words.}
-}}
+}
+}
 \usage{
 count_functions
 }
diff --git a/man/textfeatures.Rd b/man/textfeatures.Rd
index 0cf2389..f7ef51b 100644
--- a/man/textfeatures.Rd
+++ b/man/textfeatures.Rd
@@ -4,8 +4,14 @@
 \alias{textfeatures}
 \title{textfeatures}
 \usage{
-textfeatures(text, sentiment = TRUE, word_dims = NULL,
-  normalize = TRUE, newdata = NULL, verbose = TRUE)
+textfeatures(
+  text,
+  sentiment = TRUE,
+  word_dims = NULL,
+  normalize = TRUE,
+  newdata = NULL,
+  verbose = TRUE
+)
 }
 \arguments{
 \item{text}{Input data. Should be character vector or data frame with character
@@ -42,13 +48,13 @@ Extracts features from text vector.
 trump_tweets <- c(
   "#FraudNewsCNN #FNN https://t.co/WYUnHjjUjg",
   "TODAY WE MAKE AMERICA GREAT AGAIN!",
-  paste("Why would Kim Jong-un insult me by calling me \\"old,\\" when I would",
-    "NEVER call him \\"short and fat?\\" Oh well, I try so hard to be his",
+  paste("Why would Kim Jong-un insult me by calling me \"old,\" when I would",
+    "NEVER call him \"short and fat?\" Oh well, I try so hard to be his",
     "friend - and maybe someday that will happen!"),
   paste("Such a beautiful and important evening! The forgotten man and woman",
     "will never be forgotten again. We will all come together as never before"),
-  paste("North Korean Leader Kim Jong Un just stated that the \\"Nuclear",
-    "Button is on his desk at all times.\\" Will someone from his depleted and",
+  paste("North Korean Leader Kim Jong Un just stated that the \"Nuclear",
+    "Button is on his desk at all times.\" Will someone from his depleted and",
     "food starved regime please inform him that I too have a Nuclear Button,",
     "but it is a much bigger &amp; more powerful one than his, and my Button",
     "works!")
@@ -60,9 +66,9 @@ textfeatures(trump_tweets)
 ## data frame with a character vector named "text"
 df <- data.frame(
   id = c(1, 2, 3),
-  text = c("this is A!\\t sEntence https://github.com about #rstats @github",
+  text = c("this is A!\t sEntence https://github.com about #rstats @github",
     "and another sentence here",
-    "The following list:\\n- one\\n- two\\n- three\\nOkay!?!"),
+    "The following list:\n- one\n- two\n- three\nOkay!?!"),
   stringsAsFactors = FALSE
 )
 
diff --git a/man/word_dims.Rd b/man/word_dims.Rd
index 1ea1b87..48425cf 100644
--- a/man/word_dims.Rd
+++ b/man/word_dims.Rd
@@ -29,13 +29,13 @@ Calculates word2vec dimension estimates
 trump_tweets <- c(
 "#FraudNewsCNN #FNN https://t.co/WYUnHjjUjg",
 "TODAY WE MAKE AMERICA GREAT AGAIN!",
-paste("Why would Kim Jong-un insult me by calling me \\"old,\\" when I would",
-      "NEVER call him \\"short and fat?\\" Oh well, I try so hard to be his",
+paste("Why would Kim Jong-un insult me by calling me \"old,\" when I would",
+      "NEVER call him \"short and fat?\" Oh well, I try so hard to be his",
       "friend - and maybe someday that will happen!"),
 paste("Such a beautiful and important evening! The forgotten man and woman",
       "will never be forgotten again. We will all come together as never before"),
-paste("North Korean Leader Kim Jong Un just stated that the \\"Nuclear",
-      "Button is on his desk at all times.\\" Will someone from his depleted and",
+paste("North Korean Leader Kim Jong Un just stated that the \"Nuclear",
+      "Button is on his desk at all times.\" Will someone from his depleted and",
       "food starved regime please inform him that I too have a Nuclear Button,",
       "but it is a much bigger &amp; more powerful one than his, and my Button",
       "works!")