detect_rpc_clinician_scale_measures/visualizations.r at main · brad-cannell/detect_rpc_clinician_scale_measures · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
library(dplyr)
library(ggalt)
library(ggplot2)
library(readr)
library(tidyr)

source("data_maps.r")

# Generate summary plots for ABEM survey responses using harmonized data and
# shared factor orderings defined in `data_maps.r`.

# Import harmonized survey data and encode likert/socdem fields as ordered
# factors so plots display in the intended order.
data <- read_csv(
  "data/clinican_scale_measures_harmonized_analysis_data.csv",
  col_types = cols(
    socdem_hispanic = col_factor(levels = socdem_hispanic_factor_levels),
    socdem_identity = col_factor(levels = socdem_identity_factor_levels),
    socdem_marital = col_factor(levels = socdem_marital_factor_levels),
    socdem_military = col_factor(levels = socdem_military_factor_levels),
    socdem_care = col_factor(levels = socdem_care_factor_levels),
    socdem_experience = col_factor(levels = socdem_experience_factor_levels)
  )
) |>
  mutate(
    across(
      starts_with("abem_"),
      ~ factor(as.character(.x), levels = abem_factor_levels, ordered = TRUE)
    )
  )

# Load numeric Likert summary data for ABEM items to show mean differences; this
# dataset is prepared upstream so plotting code can stay lightweight.
abem_mean_diff <- read_csv(
  "data/csm_likert_numeric_data.csv",
  col_types = cols(
    item = col_factor(levels = abem_descriptive_names)
  )
)

# Bar plot comparing Canvas pre vs post mean scores by item
p1 <- abem_mean_diff |>
  ggplot(aes(x = mean_canvas_pre, xend = mean_canvas_post, y = item)) +
  geom_dumbbell(
    colour = "#a3c4dc",
    size = 3,
    colour_xend = "#0e668b",
    dot_guide = TRUE,
    dot_guide_size = 0.25,
    show.legend = TRUE
  ) +
  labs(x = "Mean Likert Value", y = NULL)

ggsave(
  filename = "visualizations/likert_answer_mean_changes.png",
  plot = p1,
  width = 4500,
  height = 3500,
  units = "px",
  dpi = "retina"
)


# Prep long-format data to compare pre/post response distributions
abem_long <- data |>
  filter(data_source %in% c("canvas_pre", "canvas_post")) |>
  pivot_longer(
    cols = starts_with("abem_"),
    names_to = "item",
    values_to = "response"
  ) |>
  drop_na(response) |>
  mutate(
    # Make scores numeric for plotting and attach descriptive item labels.
    score_num = as.integer(response) - 3,
    item = factor(
      abem_descriptive_names[item],
      levels = abem_descriptive_names
    ),
    data_source = recode(
      data_source,
      canvas_pre = "Canvas-pre",
      canvas_post = "Canvas-post"
    ),
    data_source = factor(data_source, levels = c("Canvas-pre", "Canvas-post"))
  )

# Distribution plot comparing pre/post ABEM item responses. Uses the descriptive
# item labels and numeric scores computed above to keep facets ordered.
p2 <- abem_long |>
  select(item, response, score_num, data_source) |>
  ggplot(aes(y = response, fill = data_source)) +
  geom_histogram(stat = "count", position = "dodge", width = 0.5) +
  scale_fill_manual(values = c("#a3c4dc", "#0e668b")) +
  facet_wrap(vars(item), labeller = label_wrap_gen(26)) +
  labs(x = NULL, y = "Likert Response", fill = "Survey")

# Persist the distribution plot for use in reports/manuscripts.
ggsave(
  filename = "visualizations/likert_answer_distributions.png",
  plot = p2,
  width = 4500,
  height = 3500,
  units = "px",
  dpi = "retina"
)

p3 <- abem_long |>
  select(id, item, response, score_num, data_source) |>
  ggplot(aes(y = response, fill = data_source)) +
  geom_histogram(stat = "count", position = "dodge", width = 0.5) +
  facet_wrap(vars(id, item), labeller = label_wrap_gen(26)) +
  labs(x = NULL, y = "Likert Response", fill = "Survey")