Merge pull request #7 from Ben-Sacks/main

Ben-Sacks · web-flow · commit 805d9a4298a8 · 2025-10-04T10:15:21.000-04:00
add generate_shams() and documentation
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -50,6 +50,7 @@ Collate:
     'compute_lagcorr.R'
     'corpus_analytics.R'
     'data.R'
+    'generate_shams.R'
     'globals.R'
     'prep_dyads.R'
     'read_1file.R'
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export(corpus_analytics)
+export(generate_shams)
 export(prep_dyads)
 export(read_1file)
 export(read_dyads)
@@ -18,6 +19,7 @@ importFrom(dplyr,lag)
 importFrom(dplyr,left_join)
 importFrom(dplyr,matches)
 importFrom(dplyr,mutate)
+importFrom(dplyr,n)
 importFrom(dplyr,n_distinct)
 importFrom(dplyr,na_if)
 importFrom(dplyr,rename)
diff --git a/R/generate_shams.R b/R/generate_shams.R
@@ -0,0 +1,57 @@
+#' generate_shams
+#'
+#' Generates a permutation of each individual dyad. Shuffled dyads may act as controls to their originals.
+#'
+#' @name generate_shams
+#' @param df_prep Output dataframe of prep_dyads().
+#' @param seed (Optional) a seed for reproducibility in random sampling
+#' @returns
+#' A dataframe similar to prepped dyads, with each participant's time series randomly shuffled.
+#' @importFrom magrittr %>%
+#' @importFrom dplyr group_by
+#' @importFrom dplyr summarize
+#' @importFrom dplyr across
+#' @importFrom dplyr mutate
+#' @importFrom dplyr n
+#' @export
+
+generate_shams <- function(df_prep, seed = NULL) {
+  # if a seed is given, set it
+  if (is.null(seed)) { # if not given, pick a random seed
+    seed = sample(1:100000, size = 1)
+  }
+
+  # summarize down to turn means
+  turn_mean_df <- df_prep %>%
+    dplyr::group_by(Event_ID, Exchange_Count, Participant_ID) %>%
+    dplyr::summarize(
+      dplyr::across(
+        matches("^(emo_|lex_|phon_|sem_|df_)"),
+        ~mean(.x, na.rm = T)
+      ),
+      # these can be included as a sanity check
+      Text_Prep = paste(Text_Prep, collapse = " "),
+      Text_Clean = paste(Text_Clean, collapse = " "),
+      .groups = "drop"
+    )
+
+  # define function that will allow each column to be sampled identically
+  sample_seed <- function(x, seed) {
+    set.seed(seed)
+    return(sample(x, size = length(x), replace = F))
+  }
+
+  # shuffle each participant's time series
+  sham_df <- turn_mean_df %>%
+    dplyr::group_by(Event_ID, Participant_ID) %>%
+    dplyr::mutate(
+      dplyr::across(
+        c(matches("^(emo_|lex_|phon_|sem_|df_)"), Text_Prep, Text_Clean),
+        ~sample_seed(.x, seed = seed)
+      )
+    ) %>%
+    dplyr::group_by(Event_ID) %>%
+    dplyr::mutate(Turn_Count = 1:dplyr::n(), .after = Event_ID)
+
+  return(sham_df)
+}
diff --git a/man/generate_shams.Rd b/man/generate_shams.Rd