assignment-b4-Chendong-Luo/optionA.Rmd at main · stat545ubc-2024/assignment-b4-Chendong-Luo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
---
title: "Assignment B-4"
output: github_document
---

## Exercise 1

```{r}
# libraries
library(janeaustenr)
library(tidyverse)
library(stringr)
library(stopwords)
library(dplyr)
library(purrr)


# emma text
emma_text <- janeaustenr::emma %>%
  # combine lines in emma
  str_c(collapse = " ") %>%

  # remove punctuation
  str_replace_all("[[:punct:]]", "") %>%

  # convert to lower case
  str_to_lower()


# split into words
emma_words <- str_split(emma_text, "\\s+") %>%
  unlist()


# english stop words in stopwords package (https://cran.r-project.org/web/packages/stopwords/readme/README.html)
stop_words <- stopwords("en")


# filter out stop words in emma. The discard method is in purrr package (https://purrr.tidyverse.org/articles/base.html)
filtered_words <- discard(emma_words, ~ .x %in% stop_words)


# count frequency of words in emma
word_counts <- as_tibble(filtered_words) %>%
  group_by(value) %>%
  summarise(n = n()) %>%
  arrange(desc(n))

# top 20 words
top_words <- word_counts %>%
  slice_max(order_by = n, n = 20)

# plot words frequency
ggplot(top_words, aes(x = reorder(value, n), y = n)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(title = "Top 20 Most Common Words in Emma",
    x = "Words",
    y = "Frequency") +
  theme_minimal()
```

## Exercise 2

```{r}
library(testthat)

#' Convert Words to My Pig Latin
#'
#' @description
#' The function converts English words to my version of Pig Latin.
#'
#' * Rearrangement:
#'  1.For words that begin with consonant sounds, all letters after the initial vowel are placed at the beginning of the word sequence.
#'  2.When words begin with consonant clusters (multiple consonants that form one sound), all letters after the initial vowel are placed at the beginning of the word sequence.
#'  3.For words beginning with vowel sounds, remove the first vowel sounds.
#' * Addition: Add "ez" to the beginning of the rearranged word.
#'
#' @param words A character vector of words to convert.
#' @return A character vector of words converted to my Pig Latin.
#' @examples
#' my_pig_latin("hello")    # "ezllohe"
#' my_pig_latin("friends")    # "ezendsfri"
#' my_pig_latin("eat")    # "ezat"
#' my_pig_latin(c("apple", "go")) # "ezpple", "ezgo"
#' @export
my_pig_latin <- function(words) {

  # Function to convert a single word
  convert_word <- function(word) {

    if (!is.character(word)) {
      stop("Input contains non-character")
    }

    # empty word
    if (word == "") {
        return("")
    }

    # Find the position of the first vowel
    first_vowel_position <- str_locate(word, "[aeiouAEIOU]")[1, 1]

    if (is.na(first_vowel_position)) {
      # no vowels in word
      rearrange <- word

    } else if (first_vowel_position == 1) {

      # remove vowel when word starts with a vowel
      rearrange <- str_sub(word, first_vowel_position + 1)
    }

    else {

      # Move all letters after the first vowel to the beginning when word starts with consonants
      rearrange <- str_c(str_sub(word, first_vowel_position + 1), str_sub(word, 1, first_vowel_position))
    }

    # Add "ez" to the beginning
    additon <- str_c("ez", rearrange)


    return(additon)
  }

  # map conversion to every word in the vector
  res <- map_chr(words, convert_word)

  return(res)
}
```
### Examples
```{r}
my_pig_latin("")
my_pig_latin("hello")
my_pig_latin("friends")
my_pig_latin("eat")
my_pig_latin(c("apple", "go"))

```


### Testing
```{r}
test_that("Check input", {

  # invalid input
  expect_error(my_pig_latin(123), "Input contains non-character")

  # empty string
  expect_equal(my_pig_latin(""), "")
})


test_that("Check single word", {
  # start with single consonant
  expect_equal(my_pig_latin("hello"), "ezllohe")

  # start with consonant cluster
  expect_equal( my_pig_latin("friends"), "ezendsfri")

  # start with vowel
  expect_equal(my_pig_latin("apple"), "ezpple")
})

test_that("Check multiple words in a vector", {
  expect_equal(my_pig_latin(c("go", "alpha")), c("ezgo", "ezlpha"))
  expect_equal(my_pig_latin(c("a", "bed")), c("ez", "ezdbe"))
  expect_equal(my_pig_latin(c("", "cat")), c("", "eztca"))
})
```