pythonhealthdatascience
diff --git a/‎.github/workflows/lint.yaml‎
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/lint.yaml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎DESCRIPTION‎
Lines changed: 2 additions & 1 deletion b/‎DESCRIPTION‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎NAMESPACE‎
Lines changed: 3 additions & 14 deletions b/‎NAMESPACE‎
Lines changed: 3 additions & 14 deletions
diff --git a/‎R/choose_replications.R‎
Lines changed: 31 additions & 31 deletions b/‎R/choose_replications.R‎
Lines changed: 31 additions & 31 deletions
diff --git a/‎R/get_run_results.R‎
Lines changed: 70 additions & 51 deletions b/‎R/get_run_results.R‎
Lines changed: 70 additions & 51 deletions
@@ -24,13 +24,17 @@ jobs:
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::lintr, local::.
+          extra-packages: any::lintr, any::cyclocomp, local::.
           needs: lint
 
       - name: Lint package
         run: lintr::lint_package()
         shell: Rscript {0}
+        env:
+          LINTR_ERROR_ON_LINT: true
 
       - name: Lint rmarkdown
         run: lintr::lint_dir("rmarkdown")
         shell: Rscript {0}
+        env:
+          LINTR_ERROR_ON_LINT: true
@@ -26,7 +26,8 @@ Imports:
     tidyselect,
     future,
     future.apply,
-    ggplot2
+    ggplot2,
+    tibble
 Suggests:
     testthat (>= 3.0.0),
     patrick,
 
@@ -7,28 +7,18 @@ export(parameters)
 export(run_scenarios)
 export(runner)
 export(valid_inputs)
-importFrom(dplyr,filter)
+importFrom(dplyr,bind_cols)
+importFrom(dplyr,bind_rows)
 importFrom(dplyr,full_join)
 importFrom(dplyr,group_by)
 importFrom(dplyr,lead)
 importFrom(dplyr,mutate)
 importFrom(dplyr,n_distinct)
-importFrom(dplyr,pull)
-importFrom(dplyr,select)
-importFrom(dplyr,slice_head)
 importFrom(dplyr,summarise)
 importFrom(future,multisession)
 importFrom(future,plan)
 importFrom(future,sequential)
 importFrom(future.apply,future_lapply)
-importFrom(ggplot2,aes)
-importFrom(ggplot2,geom_line)
-importFrom(ggplot2,geom_ribbon)
-importFrom(ggplot2,geom_vline)
-importFrom(ggplot2,ggplot)
-importFrom(ggplot2,ggsave)
-importFrom(ggplot2,labs)
-importFrom(ggplot2,theme_minimal)
 importFrom(magrittr,"%>%")
 importFrom(purrr,reduce)
 importFrom(rlang,.data)
@@ -45,8 +35,7 @@ importFrom(simmer,timeout)
 importFrom(simmer,trajectory)
 importFrom(simmer,wrap)
 importFrom(stats,rexp)
-importFrom(stats,sd)
-importFrom(stats,t.test)
+importFrom(tibble,tibble)
 importFrom(tidyr,drop_na)
 importFrom(tidyr,pivot_wider)
 importFrom(tidyselect,any_of)
 
@@ -7,21 +7,14 @@
 #' @param path Path inc. filename to save figure to.
 #' @param min_rep A suggested minimum number of replications (default=NULL).
 #'
-#' @importFrom stats sd t.test
-#' @importFrom dplyr filter slice_head select pull
-#' @importFrom ggplot2 ggplot aes geom_line geom_ribbon geom_vline labs
-#' theme_minimal ggsave
-#' @importFrom rlang .data
-#'
 #' @return Dataframe with results from each replication.
 #' @export
 
 confidence_interval_method <- function(replications, desired_precision, metric,
                                        yaxis_title, path, min_rep = NULL) {
   # Run model for specified number of replications
   param <- parameters(number_of_runs = replications)
-  raw_results <- runner(param)
-  results <- get_run_results(raw_results)
+  results <- runner(param)[["run_results"]]
 
   # If mean of metric is less than 1, multiply by 100
   if (mean(results[[metric]]) < 1L) {
@@ -37,13 +30,13 @@ confidence_interval_method <- function(replications, desired_precision, metric,
   for (i in 1L:replications) {
 
     # Filter rows up to the i-th replication
-    subset <- results[[metric]][1L:i]
+    subset_data <- results[[metric]][1L:i]
 
     # Calculate mean
-    mean <- mean(subset)
+    mean_value <- mean(subset_data)
 
-    # Some calculations require more than 1 observation else will error...
-    if (i == 1L) {
+    # Some calculations require a few observations else will error...
+    if (i < 3L) {
       # When only one observation, set to NA
       std_dev <- NA
       ci_lower <- NA
@@ -52,17 +45,17 @@ confidence_interval_method <- function(replications, desired_precision, metric,
     } else {
       # Else, calculate standard deviation, 95% confidence interval, and
       # percentage deviation
-      std_dev <- sd(subset)
-      ci <- t.test(subset)[["conf.int"]]
+      std_dev <- stats::sd(subset_data)
+      ci <- stats::t.test(subset_data)[["conf.int"]]
       ci_lower <- ci[[1L]]
       ci_upper <- ci[[2L]]
-      deviation <- ((ci_upper - mean) / mean) * 100L
+      deviation <- ((ci_upper - mean_value) / mean_value) * 100L
     }
 
     # Append to the cumulative list
     cumulative_list[[i]] <- data.frame(
       replications = i,
-      cumulative_mean = mean,
+      cumulative_mean = mean_value,
       cumulative_std = std_dev,
       ci_lower = ci_lower,
       ci_upper = ci_upper,
@@ -74,40 +67,47 @@ confidence_interval_method <- function(replications, desired_precision, metric,
   cumulative <- do.call(rbind, cumulative_list)
 
   # Get the minimum number of replications where deviation is less than target
-  compare <- cumulative %>%
-    filter(.data[["perc_deviation"]] <= desired_precision * 100L)
+  compare <- dplyr::filter(
+    cumulative, .data[["perc_deviation"]] <= desired_precision * 100L
+  )
   if (nrow(compare) > 0L) {
     # Get minimum number
     n_reps <- compare %>%
-      slice_head() %>%
+      dplyr::slice_head() %>%
       dplyr::select(replications) %>%
-      pull()
-    print(paste0("Reached desired precision (", desired_precision, ") in ",
-                 n_reps, " replications."))
+      dplyr::pull()
+    message("Reached desired precision (", desired_precision, ") in ",
+            n_reps, " replications.")
   } else {
     warning("Running ", replications, " replications did not reach ",
-            "desired precision (", desired_precision, ").")
+            "desired precision (", desired_precision, ").", call. = FALSE)
   }
 
   # Plot the cumulative mean and confidence interval
-  p <- ggplot(cumulative, aes(x = .data[["replications"]],
-                              y = .data[["cumulative_mean"]])) +
-    geom_line() +
-    geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), alpha = 0.2)
+  p <- ggplot2::ggplot(cumulative,
+                       ggplot2::aes(x = .data[["replications"]],
+                                    y = .data[["cumulative_mean"]])) +
+    ggplot2::geom_line() +
+    ggplot2::geom_ribbon(
+      ggplot2::aes(ymin = ci_lower, ymax = ci_upper),
+      alpha = 0.2
+    )
 
   # If specified, plot the minimum suggested number of replications
   if (!is.null(min_rep)) {
     p <- p +
-      geom_vline(xintercept = min_rep, linetype = "dashed", color = "red")
+      ggplot2::geom_vline(
+        xintercept = min_rep, linetype = "dashed", color = "red"
+      )
   }
 
   # Modify labels and style
   p <- p +
-    labs(x = "Replications", y = yaxis_title) +
-    theme_minimal()
+    ggplot2::labs(x = "Replications", y = yaxis_title) +
+    ggplot2::theme_minimal()
 
   # Save the plot
-  ggsave(filename = path, width = 6.5, height = 4L, bg = "white")
+  ggplot2::ggsave(filename = path, width = 6.5, height = 4L, bg = "white")
 
   return(cumulative)
 }
@@ -1,8 +1,10 @@
-#' Get results from each replication.
+#' Process the raw monitored arrivals and resources.
 #'
-#' For each replication (there can be one or many), calculate the: (1) number
-#' of arrivals, (2) mean wait time for each resource, (3) mean activity time
-#' for each resource, and (4) mean resource utilisation.
+#' For the provided replication, calculate the:
+#' (1) number of arrivals
+#' (2) mean wait time for each resource
+#' (3) mean activity time for each resource
+#' (4) mean resource utilisation.
 #'
 #' Credit: The utilisation calculation is taken from the
 #' `plot.resources.utilization()` function in simmer.plot 0.1.18, which is
@@ -18,71 +20,88 @@
 #' @param results Named list with `arrivals` containing output from
 #' `get_mon_arrivals()` and `resources` containing output from
 #' `get_mon_resources()` (`per_resource = TRUE` and `ongoing = TRUE`).
+#' @param run_number Integer representing index of current simulation run.
 #'
 #' @importFrom dplyr group_by summarise n_distinct mutate lead full_join
+#' @importFrom dplyr bind_cols
 #' @importFrom purrr reduce
 #' @importFrom rlang .data
 #' @importFrom simmer get_mon_resources get_mon_arrivals now
 #' @importFrom tidyr pivot_wider drop_na
 #' @importFrom tidyselect any_of
+#' @importFrom tibble tibble
 #'
-#' @return Tibble with results from each replication.
+#' @return Tibble with processed results from replication.
 #' @export
 
-get_run_results <- function(results) {
+get_run_results <- function(results, run_number) {
 
   # Remove patients who were still waiting and had not completed
   results[["arrivals"]] <- results[["arrivals"]] %>%
     drop_na(any_of("end_time"))
 
-  # Calculate the number of arrivals
-  calc_arr <- results[["arrivals"]] %>%
-    group_by(.data[["replication"]]) %>%
-    summarise(arrivals = n_distinct(.data[["name"]]))
+  # If there are any arrivals...
+  if (nrow(results[["arrivals"]]) > 0L) {
 
-  # Calculate the mean wait time for each resource
-  calc_wait <- results[["arrivals"]] %>%
-    mutate(
-      waiting_time = round(
-        .data[["end_time"]] - (
-          .data[["start_time"]] + .data[["activity_time"]]
-        ), 10L
-      )
-    ) %>%
-    group_by(.data[["resource"]], .data[["replication"]]) %>%
-    summarise(mean_waiting_time = mean(.data[["waiting_time"]])) %>%
-    pivot_wider(names_from = "resource",
-                values_from = "mean_waiting_time",
-                names_glue = "mean_waiting_time_{resource}")
+    # Calculate the number of arrivals
+    calc_arr <- results[["arrivals"]] %>%
+      summarise(arrivals = n_distinct(.data[["name"]]))
 
-  # Calculate the mean time spent with each resource
-  calc_act <- results[["arrivals"]] %>%
-    group_by(.data[["resource"]], .data[["replication"]]) %>%
-    summarise(mean_activity_time = mean(.data[["activity_time"]])) %>%
-    pivot_wider(names_from = "resource",
-                values_from = "mean_activity_time",
-                names_glue = "mean_activity_time_{resource}")
+    # Calculate the mean wait time for each resource
+    calc_wait <- results[["arrivals"]] %>%
+      mutate(
+        waiting_time = round(
+          .data[["end_time"]] - (
+            .data[["start_time"]] + .data[["activity_time"]]
+          ), 10L
+        )
+      ) %>%
+      group_by(.data[["resource"]]) %>%
+      summarise(mean_waiting_time = mean(.data[["waiting_time"]])) %>%
+      pivot_wider(names_from = "resource",
+                  values_from = "mean_waiting_time",
+                  names_glue = "mean_waiting_time_{resource}")
 
-  # Calculate the mean resource utilisation
-  # Utilisation is given by the total effective usage time (`in_use`) over the
-  # total time intervals considered (`dt`).
-  calc_util <- results[["resources"]] %>%
-    group_by(.data[["resource"]], .data[["replication"]]) %>%
-    mutate(dt = lead(.data[["time"]]) - .data[["time"]]) %>%
-    mutate(capacity = pmax(.data[["capacity"]], .data[["server"]])) %>%
-    mutate(dt = ifelse(.data[["capacity"]] > 0L, .data[["dt"]], 0L)) %>%
-    mutate(in_use = .data[["dt"]] * .data[["server"]] / .data[["capacity"]]) %>%
-    summarise(
-      utilisation = sum(.data[["in_use"]], na.rm = TRUE) /
-        sum(.data[["dt"]], na.rm = TRUE)
-    ) %>%
-    pivot_wider(names_from = "resource",
-                values_from = "utilisation",
-                names_glue = "utilisation_{resource}")
+    # Calculate the mean time spent with each resource
+    calc_act <- results[["arrivals"]] %>%
+      group_by(.data[["resource"]]) %>%
+      summarise(mean_activity_time = mean(.data[["activity_time"]])) %>%
+      pivot_wider(names_from = "resource",
+                  values_from = "mean_activity_time",
+                  names_glue = "mean_activity_time_{resource}")
 
-  # Combine all calculated metrics into a single dataframe
-  processed_result <- list(calc_arr, calc_wait, calc_act, calc_util) %>%
-    reduce(full_join, by = "replication")
+    # Calculate the mean resource utilisation
+    # Utilisation is given by the total effective usage time (`in_use`) over the
+    # total time intervals considered (`dt`).
+    calc_util <- results[["resources"]] %>%
+      group_by(.data[["resource"]]) %>%
+      # nolint start
+      mutate(dt = lead(.data[["time"]]) - .data[["time"]]) %>%
+      mutate(capacity = pmax(.data[["capacity"]], .data[["server"]])) %>%
+      mutate(dt = ifelse(.data[["capacity"]] > 0L, .data[["dt"]], 0L)) %>%
+      mutate(in_use = (.data[["dt"]] * .data[["server"]] /
+                       .data[["capacity"]])) %>%
+      # nolint end
+      summarise(
+        utilisation = sum(.data[["in_use"]], na.rm = TRUE) /
+          sum(.data[["dt"]], na.rm = TRUE)
+      ) %>%
+      pivot_wider(names_from = "resource",
+                  values_from = "utilisation",
+                  names_glue = "utilisation_{resource}")
 
-  return(processed_result)
+    # Combine all calculated metrics into a single dataframe, and along with
+    # the replication number
+    processed_result <- dplyr::bind_cols(
+      tibble(replication = run_number),
+      calc_arr, calc_wait, calc_act, calc_util
+    )
+  } else {
+    # If there were no arrivals, return dataframe row with just the replication
+    # number and arrivals column set to 0
+    processed_result <- tibble(replication = run_number,
+                               arrivals = nrow(results[["arrivals"]]))
+  }
+
+  return(processed_result) # nolint
 }