Skip to content

Commit 28a2f9e

Browse files
authored
Merge pull request #33 from pythonhealthdatascience/dev
Dev
2 parents fb02ffe + 8fd6d2c commit 28a2f9e

39 files changed

+1795
-742
lines changed

.github/workflows/lint.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,17 @@ jobs:
2424

2525
- uses: r-lib/actions/setup-r-dependencies@v2
2626
with:
27-
extra-packages: any::lintr, local::.
27+
extra-packages: any::lintr, any::cyclocomp, local::.
2828
needs: lint
2929

3030
- name: Lint package
3131
run: lintr::lint_package()
3232
shell: Rscript {0}
33+
env:
34+
LINTR_ERROR_ON_LINT: true
3335

3436
- name: Lint rmarkdown
3537
run: lintr::lint_dir("rmarkdown")
3638
shell: Rscript {0}
39+
env:
40+
LINTR_ERROR_ON_LINT: true

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ Imports:
2626
tidyselect,
2727
future,
2828
future.apply,
29-
ggplot2
29+
ggplot2,
30+
tibble
3031
Suggests:
3132
testthat (>= 3.0.0),
3233
patrick,

NAMESPACE

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,18 @@ export(parameters)
77
export(run_scenarios)
88
export(runner)
99
export(valid_inputs)
10-
importFrom(dplyr,filter)
10+
importFrom(dplyr,bind_cols)
11+
importFrom(dplyr,bind_rows)
1112
importFrom(dplyr,full_join)
1213
importFrom(dplyr,group_by)
1314
importFrom(dplyr,lead)
1415
importFrom(dplyr,mutate)
1516
importFrom(dplyr,n_distinct)
16-
importFrom(dplyr,pull)
17-
importFrom(dplyr,select)
18-
importFrom(dplyr,slice_head)
1917
importFrom(dplyr,summarise)
2018
importFrom(future,multisession)
2119
importFrom(future,plan)
2220
importFrom(future,sequential)
2321
importFrom(future.apply,future_lapply)
24-
importFrom(ggplot2,aes)
25-
importFrom(ggplot2,geom_line)
26-
importFrom(ggplot2,geom_ribbon)
27-
importFrom(ggplot2,geom_vline)
28-
importFrom(ggplot2,ggplot)
29-
importFrom(ggplot2,ggsave)
30-
importFrom(ggplot2,labs)
31-
importFrom(ggplot2,theme_minimal)
3222
importFrom(magrittr,"%>%")
3323
importFrom(purrr,reduce)
3424
importFrom(rlang,.data)
@@ -45,8 +35,7 @@ importFrom(simmer,timeout)
4535
importFrom(simmer,trajectory)
4636
importFrom(simmer,wrap)
4737
importFrom(stats,rexp)
48-
importFrom(stats,sd)
49-
importFrom(stats,t.test)
38+
importFrom(tibble,tibble)
5039
importFrom(tidyr,drop_na)
5140
importFrom(tidyr,pivot_wider)
5241
importFrom(tidyselect,any_of)

R/choose_replications.R

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,14 @@
77
#' @param path Path inc. filename to save figure to.
88
#' @param min_rep A suggested minimum number of replications (default=NULL).
99
#'
10-
#' @importFrom stats sd t.test
11-
#' @importFrom dplyr filter slice_head select pull
12-
#' @importFrom ggplot2 ggplot aes geom_line geom_ribbon geom_vline labs
13-
#' theme_minimal ggsave
14-
#' @importFrom rlang .data
15-
#'
1610
#' @return Dataframe with results from each replication.
1711
#' @export
1812

1913
confidence_interval_method <- function(replications, desired_precision, metric,
2014
yaxis_title, path, min_rep = NULL) {
2115
# Run model for specified number of replications
2216
param <- parameters(number_of_runs = replications)
23-
raw_results <- runner(param)
24-
results <- get_run_results(raw_results)
17+
results <- runner(param)[["run_results"]]
2518

2619
# If mean of metric is less than 1, multiply by 100
2720
if (mean(results[[metric]]) < 1L) {
@@ -37,13 +30,13 @@ confidence_interval_method <- function(replications, desired_precision, metric,
3730
for (i in 1L:replications) {
3831

3932
# Filter rows up to the i-th replication
40-
subset <- results[[metric]][1L:i]
33+
subset_data <- results[[metric]][1L:i]
4134

4235
# Calculate mean
43-
mean <- mean(subset)
36+
mean_value <- mean(subset_data)
4437

45-
# Some calculations require more than 1 observation else will error...
46-
if (i == 1L) {
38+
# Some calculations require a few observations else will error...
39+
if (i < 3L) {
4740
# When only one observation, set to NA
4841
std_dev <- NA
4942
ci_lower <- NA
@@ -52,17 +45,17 @@ confidence_interval_method <- function(replications, desired_precision, metric,
5245
} else {
5346
# Else, calculate standard deviation, 95% confidence interval, and
5447
# percentage deviation
55-
std_dev <- sd(subset)
56-
ci <- t.test(subset)[["conf.int"]]
48+
std_dev <- stats::sd(subset_data)
49+
ci <- stats::t.test(subset_data)[["conf.int"]]
5750
ci_lower <- ci[[1L]]
5851
ci_upper <- ci[[2L]]
59-
deviation <- ((ci_upper - mean) / mean) * 100L
52+
deviation <- ((ci_upper - mean_value) / mean_value) * 100L
6053
}
6154

6255
# Append to the cumulative list
6356
cumulative_list[[i]] <- data.frame(
6457
replications = i,
65-
cumulative_mean = mean,
58+
cumulative_mean = mean_value,
6659
cumulative_std = std_dev,
6760
ci_lower = ci_lower,
6861
ci_upper = ci_upper,
@@ -74,40 +67,47 @@ confidence_interval_method <- function(replications, desired_precision, metric,
7467
cumulative <- do.call(rbind, cumulative_list)
7568

7669
# Get the minimum number of replications where deviation is less than target
77-
compare <- cumulative %>%
78-
filter(.data[["perc_deviation"]] <= desired_precision * 100L)
70+
compare <- dplyr::filter(
71+
cumulative, .data[["perc_deviation"]] <= desired_precision * 100L
72+
)
7973
if (nrow(compare) > 0L) {
8074
# Get minimum number
8175
n_reps <- compare %>%
82-
slice_head() %>%
76+
dplyr::slice_head() %>%
8377
dplyr::select(replications) %>%
84-
pull()
85-
print(paste0("Reached desired precision (", desired_precision, ") in ",
86-
n_reps, " replications."))
78+
dplyr::pull()
79+
message("Reached desired precision (", desired_precision, ") in ",
80+
n_reps, " replications.")
8781
} else {
8882
warning("Running ", replications, " replications did not reach ",
89-
"desired precision (", desired_precision, ").")
83+
"desired precision (", desired_precision, ").", call. = FALSE)
9084
}
9185

9286
# Plot the cumulative mean and confidence interval
93-
p <- ggplot(cumulative, aes(x = .data[["replications"]],
94-
y = .data[["cumulative_mean"]])) +
95-
geom_line() +
96-
geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), alpha = 0.2)
87+
p <- ggplot2::ggplot(cumulative,
88+
ggplot2::aes(x = .data[["replications"]],
89+
y = .data[["cumulative_mean"]])) +
90+
ggplot2::geom_line() +
91+
ggplot2::geom_ribbon(
92+
ggplot2::aes(ymin = ci_lower, ymax = ci_upper),
93+
alpha = 0.2
94+
)
9795

9896
# If specified, plot the minimum suggested number of replications
9997
if (!is.null(min_rep)) {
10098
p <- p +
101-
geom_vline(xintercept = min_rep, linetype = "dashed", color = "red")
99+
ggplot2::geom_vline(
100+
xintercept = min_rep, linetype = "dashed", color = "red"
101+
)
102102
}
103103

104104
# Modify labels and style
105105
p <- p +
106-
labs(x = "Replications", y = yaxis_title) +
107-
theme_minimal()
106+
ggplot2::labs(x = "Replications", y = yaxis_title) +
107+
ggplot2::theme_minimal()
108108

109109
# Save the plot
110-
ggsave(filename = path, width = 6.5, height = 4L, bg = "white")
110+
ggplot2::ggsave(filename = path, width = 6.5, height = 4L, bg = "white")
111111

112112
return(cumulative)
113113
}

R/get_run_results.R

Lines changed: 70 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
#' Get results from each replication.
1+
#' Process the raw monitored arrivals and resources.
22
#'
3-
#' For each replication (there can be one or many), calculate the: (1) number
4-
#' of arrivals, (2) mean wait time for each resource, (3) mean activity time
5-
#' for each resource, and (4) mean resource utilisation.
3+
#' For the provided replication, calculate the:
4+
#' (1) number of arrivals
5+
#' (2) mean wait time for each resource
6+
#' (3) mean activity time for each resource
7+
#' (4) mean resource utilisation.
68
#'
79
#' Credit: The utilisation calculation is taken from the
810
#' `plot.resources.utilization()` function in simmer.plot 0.1.18, which is
@@ -18,71 +20,88 @@
1820
#' @param results Named list with `arrivals` containing output from
1921
#' `get_mon_arrivals()` and `resources` containing output from
2022
#' `get_mon_resources()` (`per_resource = TRUE` and `ongoing = TRUE`).
23+
#' @param run_number Integer representing index of current simulation run.
2124
#'
2225
#' @importFrom dplyr group_by summarise n_distinct mutate lead full_join
26+
#' @importFrom dplyr bind_cols
2327
#' @importFrom purrr reduce
2428
#' @importFrom rlang .data
2529
#' @importFrom simmer get_mon_resources get_mon_arrivals now
2630
#' @importFrom tidyr pivot_wider drop_na
2731
#' @importFrom tidyselect any_of
32+
#' @importFrom tibble tibble
2833
#'
29-
#' @return Tibble with results from each replication.
34+
#' @return Tibble with processed results from replication.
3035
#' @export
3136

32-
get_run_results <- function(results) {
37+
get_run_results <- function(results, run_number) {
3338

3439
# Remove patients who were still waiting and had not completed
3540
results[["arrivals"]] <- results[["arrivals"]] %>%
3641
drop_na(any_of("end_time"))
3742

38-
# Calculate the number of arrivals
39-
calc_arr <- results[["arrivals"]] %>%
40-
group_by(.data[["replication"]]) %>%
41-
summarise(arrivals = n_distinct(.data[["name"]]))
43+
# If there are any arrivals...
44+
if (nrow(results[["arrivals"]]) > 0L) {
4245

43-
# Calculate the mean wait time for each resource
44-
calc_wait <- results[["arrivals"]] %>%
45-
mutate(
46-
waiting_time = round(
47-
.data[["end_time"]] - (
48-
.data[["start_time"]] + .data[["activity_time"]]
49-
), 10L
50-
)
51-
) %>%
52-
group_by(.data[["resource"]], .data[["replication"]]) %>%
53-
summarise(mean_waiting_time = mean(.data[["waiting_time"]])) %>%
54-
pivot_wider(names_from = "resource",
55-
values_from = "mean_waiting_time",
56-
names_glue = "mean_waiting_time_{resource}")
46+
# Calculate the number of arrivals
47+
calc_arr <- results[["arrivals"]] %>%
48+
summarise(arrivals = n_distinct(.data[["name"]]))
5749

58-
# Calculate the mean time spent with each resource
59-
calc_act <- results[["arrivals"]] %>%
60-
group_by(.data[["resource"]], .data[["replication"]]) %>%
61-
summarise(mean_activity_time = mean(.data[["activity_time"]])) %>%
62-
pivot_wider(names_from = "resource",
63-
values_from = "mean_activity_time",
64-
names_glue = "mean_activity_time_{resource}")
50+
# Calculate the mean wait time for each resource
51+
calc_wait <- results[["arrivals"]] %>%
52+
mutate(
53+
waiting_time = round(
54+
.data[["end_time"]] - (
55+
.data[["start_time"]] + .data[["activity_time"]]
56+
), 10L
57+
)
58+
) %>%
59+
group_by(.data[["resource"]]) %>%
60+
summarise(mean_waiting_time = mean(.data[["waiting_time"]])) %>%
61+
pivot_wider(names_from = "resource",
62+
values_from = "mean_waiting_time",
63+
names_glue = "mean_waiting_time_{resource}")
6564

66-
# Calculate the mean resource utilisation
67-
# Utilisation is given by the total effective usage time (`in_use`) over the
68-
# total time intervals considered (`dt`).
69-
calc_util <- results[["resources"]] %>%
70-
group_by(.data[["resource"]], .data[["replication"]]) %>%
71-
mutate(dt = lead(.data[["time"]]) - .data[["time"]]) %>%
72-
mutate(capacity = pmax(.data[["capacity"]], .data[["server"]])) %>%
73-
mutate(dt = ifelse(.data[["capacity"]] > 0L, .data[["dt"]], 0L)) %>%
74-
mutate(in_use = .data[["dt"]] * .data[["server"]] / .data[["capacity"]]) %>%
75-
summarise(
76-
utilisation = sum(.data[["in_use"]], na.rm = TRUE) /
77-
sum(.data[["dt"]], na.rm = TRUE)
78-
) %>%
79-
pivot_wider(names_from = "resource",
80-
values_from = "utilisation",
81-
names_glue = "utilisation_{resource}")
65+
# Calculate the mean time spent with each resource
66+
calc_act <- results[["arrivals"]] %>%
67+
group_by(.data[["resource"]]) %>%
68+
summarise(mean_activity_time = mean(.data[["activity_time"]])) %>%
69+
pivot_wider(names_from = "resource",
70+
values_from = "mean_activity_time",
71+
names_glue = "mean_activity_time_{resource}")
8272

83-
# Combine all calculated metrics into a single dataframe
84-
processed_result <- list(calc_arr, calc_wait, calc_act, calc_util) %>%
85-
reduce(full_join, by = "replication")
73+
# Calculate the mean resource utilisation
74+
# Utilisation is given by the total effective usage time (`in_use`) over the
75+
# total time intervals considered (`dt`).
76+
calc_util <- results[["resources"]] %>%
77+
group_by(.data[["resource"]]) %>%
78+
# nolint start
79+
mutate(dt = lead(.data[["time"]]) - .data[["time"]]) %>%
80+
mutate(capacity = pmax(.data[["capacity"]], .data[["server"]])) %>%
81+
mutate(dt = ifelse(.data[["capacity"]] > 0L, .data[["dt"]], 0L)) %>%
82+
mutate(in_use = (.data[["dt"]] * .data[["server"]] /
83+
.data[["capacity"]])) %>%
84+
# nolint end
85+
summarise(
86+
utilisation = sum(.data[["in_use"]], na.rm = TRUE) /
87+
sum(.data[["dt"]], na.rm = TRUE)
88+
) %>%
89+
pivot_wider(names_from = "resource",
90+
values_from = "utilisation",
91+
names_glue = "utilisation_{resource}")
8692

87-
return(processed_result)
93+
# Combine all calculated metrics into a single dataframe, and along with
94+
# the replication number
95+
processed_result <- dplyr::bind_cols(
96+
tibble(replication = run_number),
97+
calc_arr, calc_wait, calc_act, calc_util
98+
)
99+
} else {
100+
# If there were no arrivals, return dataframe row with just the replication
101+
# number and arrivals column set to 0
102+
processed_result <- tibble(replication = run_number,
103+
arrivals = nrow(results[["arrivals"]]))
104+
}
105+
106+
return(processed_result) # nolint
88107
}

0 commit comments

Comments
 (0)