Skip to content

Commit d6fa122

Browse files
authored
Merge pull request #8 from Ben-Sacks/main
Add 'remove_backchannels' param to prep_dyads
2 parents 805d9a4 + e5cf7e1 commit d6fa122

File tree

3 files changed

+24
-1
lines changed

3 files changed

+24
-1
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ importFrom(DescTools,AUC)
1010
importFrom(dplyr,across)
1111
importFrom(dplyr,bind_cols)
1212
importFrom(dplyr,bind_rows)
13+
importFrom(dplyr,consecutive_id)
1314
importFrom(dplyr,distinct)
1415
importFrom(dplyr,everything)
1516
importFrom(dplyr,filter)

R/prep_dyads.R

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#' @param lemmatize logical, should words be lemmatized (switched to base morphological form), default is TRUE
1010
#' @param which_stoplist user-specified stopword removal method with options including "none", "SMART", "MIT_stops", "CA_OriginalStops", or "Temple_Stopwords25".
1111
#' "Temple_Stopwords25 is the default list
12+
#' @param remove_backchannel logical, should turns that are full of stopwords (e.g., "Uhm yeah") be preserved as NAs or removed. Removal will 'squish' the turn before and after together into one. If NAs are preserved they are later interpolated.
1213
#' @param verbose display detailed output such as error messages and progress (default is TRUE)
1314
#' @returns
1415
#' dataframe with text cleaned and vectorized to a one word per-row format.
@@ -20,6 +21,7 @@
2021
#' @importFrom dplyr mutate
2122
#' @importFrom dplyr na_if
2223
#' @importFrom dplyr ungroup
24+
#' @importFrom dplyr consecutive_id
2325
#' @importFrom magrittr %>%
2426
#' @importFrom stringi stri_replace_all_fixed
2527
#' @importFrom stringi stri_replace_all_regex
@@ -35,7 +37,8 @@
3537
#' @export
3638

3739
prep_dyads <- function(dat_read, lemmatize = TRUE, omit_stops = TRUE,
38-
which_stoplist = "Temple_stops25", verbose = TRUE) {
40+
which_stoplist = "Temple_stops25",
41+
remove_backchannel = FALSE, verbose = TRUE) {
3942
# Verification steps
4043
if (nrow(dat_read) == 0) {
4144
stop("Input dataframe is empty.")
@@ -46,6 +49,10 @@ prep_dyads <- function(dat_read, lemmatize = TRUE, omit_stops = TRUE,
4649
if (!"Participant_ID" %in% names(dat_read)) {
4750
stop("Column 'Participant_ID' not found.")
4851
}
52+
# verify backchanneling argument
53+
if (!is.logical(remove_backchannel)) {
54+
stop("Argument 'keep_backchannelling' must be logical")
55+
}
4956

5057
# Define stopword lists
5158
stopwords_lists <- list(MIT_stops = MIT_stops,
@@ -157,5 +164,17 @@ prep_dyads <- function(dat_read, lemmatize = TRUE, omit_stops = TRUE,
157164
df_prep <- df_prep %>% select(Event_ID, Participant_ID, Exchange_Count, Turn_Count,
158165
Text_Prep, Text_Clean, all_of(myvars), everything())
159166

167+
# # if backchanneling is set to be removed, squish turns together
168+
if (remove_backchannel == TRUE) {
169+
df_prep <- df_prep[!is.na(df_prep$Text_Clean), ]
170+
df_prep <- df_prep %>%
171+
dplyr::group_by(Event_ID) %>%
172+
dplyr::mutate(
173+
Turn_Count = dplyr::consecutive_id(Participant_ID),
174+
Exchange_Count = ceiling(Turn_Count / 2)
175+
) %>%
176+
dplyr::ungroup()
177+
}
178+
160179
return(df_prep)
161180
}

man/prep_dyads.Rd

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)