-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoptionA.Rmd
More file actions
172 lines (129 loc) · 4.08 KB
/
optionA.Rmd
File metadata and controls
172 lines (129 loc) · 4.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
---
title: "Assignment B-4"
output: github_document
---
## Exercise 1
```{r}
# libraries
library(janeaustenr)
library(tidyverse)
library(stringr)
library(stopwords)
library(dplyr)
library(purrr)
# emma text
emma_text <- janeaustenr::emma %>%
# combine lines in emma
str_c(collapse = " ") %>%
# remove punctuation
str_replace_all("[[:punct:]]", "") %>%
# convert to lower case
str_to_lower()
# split into words
emma_words <- str_split(emma_text, "\\s+") %>%
unlist()
# english stop words in stopwords package (https://cran.r-project.org/web/packages/stopwords/readme/README.html)
stop_words <- stopwords("en")
# filter out stop words in emma. The discard method is in purrr package (https://purrr.tidyverse.org/articles/base.html)
filtered_words <- discard(emma_words, ~ .x %in% stop_words)
# count frequency of words in emma
word_counts <- as_tibble(filtered_words) %>%
group_by(value) %>%
summarise(n = n()) %>%
arrange(desc(n))
# top 20 words
top_words <- word_counts %>%
slice_max(order_by = n, n = 20)
# plot words frequency
ggplot(top_words, aes(x = reorder(value, n), y = n)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(title = "Top 20 Most Common Words in Emma",
x = "Words",
y = "Frequency") +
theme_minimal()
```
## Exercise 2
```{r}
library(testthat)
#' Convert Words to My Pig Latin
#'
#' @description
#' The function converts English words to my version of Pig Latin.
#'
#' * Rearrangement:
#' 1.For words that begin with consonant sounds, all letters after the initial vowel are placed at the beginning of the word sequence.
#' 2.When words begin with consonant clusters (multiple consonants that form one sound), all letters after the initial vowel are placed at the beginning of the word sequence.
#' 3.For words beginning with vowel sounds, remove the first vowel sounds.
#' * Addition: Add "ez" to the beginning of the rearranged word.
#'
#' @param words A character vector of words to convert.
#' @return A character vector of words converted to my Pig Latin.
#' @examples
#' my_pig_latin("hello") # "ezllohe"
#' my_pig_latin("friends") # "ezendsfri"
#' my_pig_latin("eat") # "ezat"
#' my_pig_latin(c("apple", "go")) # "ezpple", "ezgo"
#' @export
my_pig_latin <- function(words) {
# Function to convert a single word
convert_word <- function(word) {
if (!is.character(word)) {
stop("Input contains non-character")
}
# empty word
if (word == "") {
return("")
}
# Find the position of the first vowel
first_vowel_position <- str_locate(word, "[aeiouAEIOU]")[1, 1]
if (is.na(first_vowel_position)) {
# no vowels in word
rearrange <- word
} else if (first_vowel_position == 1) {
# remove vowel when word starts with a vowel
rearrange <- str_sub(word, first_vowel_position + 1)
}
else {
# Move all letters after the first vowel to the beginning when word starts with consonants
rearrange <- str_c(str_sub(word, first_vowel_position + 1), str_sub(word, 1, first_vowel_position))
}
# Add "ez" to the beginning
additon <- str_c("ez", rearrange)
return(additon)
}
# map conversion to every word in the vector
res <- map_chr(words, convert_word)
return(res)
}
```
### Examples
```{r}
my_pig_latin("")
my_pig_latin("hello")
my_pig_latin("friends")
my_pig_latin("eat")
my_pig_latin(c("apple", "go"))
```
### Testing
```{r}
test_that("Check input", {
# invalid input
expect_error(my_pig_latin(123), "Input contains non-character")
# empty string
expect_equal(my_pig_latin(""), "")
})
test_that("Check single word", {
# start with single consonant
expect_equal(my_pig_latin("hello"), "ezllohe")
# start with consonant cluster
expect_equal( my_pig_latin("friends"), "ezendsfri")
# start with vowel
expect_equal(my_pig_latin("apple"), "ezpple")
})
test_that("Check multiple words in a vector", {
expect_equal(my_pig_latin(c("go", "alpha")), c("ezgo", "ezlpha"))
expect_equal(my_pig_latin(c("a", "bed")), c("ez", "ezdbe"))
expect_equal(my_pig_latin(c("", "cat")), c("", "eztca"))
})
```