Skip to content

Commit 074c38c

Browse files
author
Youzhi Yu
committed
added emoji extraction and emoji categorize
1 parent 9722505 commit 074c38c

File tree

9 files changed

+176
-5
lines changed

9 files changed

+176
-5
lines changed

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,5 @@ Imports:
1717
purrr,
1818
stringr,
1919
tibble,
20+
tidyr,
2021
utils

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export(emoji_categorize)
4+
export(emoji_extract_nest)
5+
export(emoji_extract_unnest)
36
export(emoji_summary)
47
export(emoji_tweets)
58
export(top_n_emojis)
@@ -8,4 +11,5 @@ import(emoji)
811
import(purrr)
912
import(stringr)
1013
import(tibble)
14+
import(tidyr)
1115
import(utils)

R/emoji-categorize.R

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
emoji_category_add <- function(emoji_unicodes, emoji_category, tweet_tbl, tweet_text){
2+
3+
tweet_tbl %>%
4+
dplyr::filter(str_detect({{ tweet_text }}, emoji_unicodes)) %>%
5+
dplyr::mutate(.emoji_category = emoji_category)
6+
7+
}
8+
9+
10+
11+
#' Categorize Emoji Tweets/text based on Emoji category
12+
#'
13+
#' Users can use \code{emoji_categorize} to see the all the categories each
14+
#' Emoji Tweet has. The function preserves the input data structure, and the
15+
#' only change is it adds an extra column with information about Emoji
16+
#' category separated by \code{|} if there is more than one category.
17+
#'
18+
#' @inheritParams emoji_summary
19+
#' @import purrr
20+
#' @import tidyr
21+
#' @import dplyr
22+
#' @return A filtered dataframe with the presence of Emoji only, and with an
23+
#' extra column \code{.emoji_category}.
24+
#' @export
25+
#'
26+
27+
28+
emoji_categorize <- function(tweet_tbl, tweet_text) {
29+
30+
purrr::map2_dfr(tidyEmoji::category_unicode_crosswalk$unicodes,
31+
tidyEmoji::category_unicode_crosswalk$category,
32+
emoji_category_add,
33+
tweet_tbl,
34+
{{ tweet_text }}) %>%
35+
tidyr::pivot_wider(names_from = .emoji_category,
36+
values_from = .emoji_category) %>%
37+
tidyr::unite(".emoji_category", c("Smileys & Emotion": "Flags"), sep = "|", na.rm = T)
38+
39+
40+
}
41+
42+
43+

R/emoji-extraction.R

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#' Emoji extraction unnested summary
2+
#'
3+
#' If users would like to know how many Emojis and what kinds of Emojis each
4+
#' Tweet has, \code{emoji_extract} is a useful function to output a global
5+
#' summary with the row number of each Tweet containing Emoji and the Unicodes
6+
#' associated with each Tweet.
7+
#'
8+
#' @inheritParams emoji_summary
9+
#'
10+
#' @import dplyr
11+
#' @import stringr
12+
#' @import tidyr
13+
#' @return A summary tibble with the original row number and Emoji count.
14+
#' @export
15+
#'
16+
emoji_extract_unnest <- function(tweet_tbl, tweet_text){
17+
tweet_tbl %>%
18+
tidyEmoji::emoji_extract_nest({{ tweet_text }}) %>%
19+
dplyr::select({{ tweet_text }}, .emoji_unicode) %>%
20+
dplyr::mutate(row_number = dplyr::row_number()) %>%
21+
tidyr::unnest(.emoji_unicode) %>%
22+
dplyr::group_by(row_number, .emoji_unicode) %>%
23+
dplyr::summarize(emoji_count = dplyr::n()) %>%
24+
dplyr::ungroup()
25+
26+
}
27+
28+
29+
30+
31+
32+
33+
34+
#' Emoji extraction nested summary
35+
#'
36+
#' This function adds an extra list column called \code{.emoji_unicode} to the
37+
#' original data, with all Emojis included.
38+
#'
39+
#' @inheritParams emoji_summary
40+
#'
41+
#' @import dplyr
42+
#' @import stringr
43+
#' @import emoji
44+
#' @return The original dataframe/tibble with an extra column collumn called
45+
#' \code{.emoji_unicode}.
46+
#' @export
47+
#'
48+
emoji_extract_nest <- function(tweet_tbl, tweet_text){
49+
tweet_tbl %>%
50+
dplyr::mutate(.emoji_unicode = stringr::str_extract_all({{ tweet_text }}, emoji::emojis %>%
51+
dplyr::filter(!str_detect(name, "keycap: \\*")) %>%
52+
dplyr::pull(emoji) %>%
53+
paste(., collapse = "|")))
54+
55+
}

R/tidyEmoji.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,6 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c(".",
1313
"emoji_name",
1414
"unicode",
1515
"emoji_category",
16-
"emoji_unicode_crosswalk"))
16+
".emoji_category",
17+
".emoji_unicode",
18+
".emoji_unicode"))

R/top-n-emojis.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@
2424

2525
top_n_emojis <- function(tweet_tbl, tweet_text, n = 20, duplicated_unicode = "no"){
2626

27-
emoji_tbl <- emoji_tweets(tweet_tbl, {{ tweet_text }})
27+
emoji_tbl <- tidyEmoji::emoji_tweets(tweet_tbl, {{ tweet_text }})
2828

29-
emoji_count_list <- purrr::map(emoji_unicode_crosswalk$unicode,
29+
emoji_count_list <- purrr::map(tidyEmoji::emoji_unicode_crosswalk$unicode,
3030
.f = count_each_emoji,
3131
emoji_tbl,
3232
{{ tweet_text }})
3333

34-
tbl <- tibble::tibble(unicode = emoji_unicode_crosswalk$unicode,
34+
tbl <- tibble::tibble(unicode = tidyEmoji::emoji_unicode_crosswalk$unicode,
3535
emoji_count = unlist(emoji_count_list)) %>%
36-
dplyr::inner_join(emoji_unicode_crosswalk, by = "unicode") %>%
36+
dplyr::inner_join(tidyEmoji::emoji_unicode_crosswalk, by = "unicode") %>%
3737
dplyr::distinct() %>%
3838
dplyr::count(emoji_name, unicode, emoji_category, wt = emoji_count, sort = T)
3939

man/emoji_categorize.Rd

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/emoji_extract_nest.Rd

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/emoji_extract_unnest.Rd

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)