diff --git a/.Rbuildignore b/.Rbuildignore index e85c2d0..759b09e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -10,4 +10,5 @@ ^\.lintr$ ^outputs$ ^rmarkdown$ -^CITATION\.cff$ \ No newline at end of file +^CITATION\.cff$ +^run_rmarkdown\.sh$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a9e0628..e4a15b4 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -2,8 +2,7 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] - pull_request: + branches: [main] workflow_dispatch: name: R-CMD-check.yaml diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index b5c5333..852f7d7 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -4,7 +4,6 @@ on: push: branches: [main] - pull_request: workflow_dispatch: name: lint diff --git a/.gitignore b/.gitignore index 5b6a065..e75435c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,49 @@ -.Rproj.user +# History files .Rhistory +.Rapp.history + +# Session Data files .RData +.RDataTmp + +# User-specific files .Ruserdata + +# Example code in package build process +*-Ex.R + +# Output files from R CMD build +/*.tar.gz + +# Output files from R CMD check +/*.Rcheck/ + +# RStudio files +.Rproj.user/ + +# produced vignettes +vignettes/*.html +vignettes/*.pdf + +# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 +.httr-oauth + +# knitr and R markdown default cache directories +*_cache/ +/cache/ + +# Temporary files created by R markdown +*.utf8.md +*.knit.md + +# R Environment Variables +.Renviron + +# pkgdown site +docs/ + +# translation temp files +po/*~ + +# RStudio Connect folder +rsconnect/ diff --git a/DESCRIPTION b/DESCRIPTION index 1f35eda..cb4672d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,15 +23,15 @@ Imports: purrr, rlang, tidyr, - R6, tidyselect, future, - future.apply + future.apply, + ggplot2 Suggests: testthat (>= 3.0.0), + patrick, lintr, devtools, - ggplot2, xtable, data.table Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 9065ed9..d3560b9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,22 +1,34 @@ # Generated by roxygen2: do not edit by hand -export(Defaults) -export(defaults) +export(confidence_interval_method) +export(get_run_results) export(model) -export(process_replications) -export(trial) +export(parameters) +export(run_scenarios) +export(runner) export(valid_inputs) -importFrom(R6,R6Class) +importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) importFrom(dplyr,lead) importFrom(dplyr,mutate) importFrom(dplyr,n_distinct) +importFrom(dplyr,pull) +importFrom(dplyr,select) +importFrom(dplyr,slice_head) importFrom(dplyr,summarise) importFrom(future,multisession) importFrom(future,plan) importFrom(future,sequential) importFrom(future.apply,future_lapply) +importFrom(ggplot2,aes) +importFrom(ggplot2,geom_line) +importFrom(ggplot2,geom_ribbon) +importFrom(ggplot2,geom_vline) +importFrom(ggplot2,ggplot) +importFrom(ggplot2,ggsave) +importFrom(ggplot2,labs) +importFrom(ggplot2,theme_minimal) importFrom(magrittr,"%>%") importFrom(purrr,reduce) importFrom(rlang,.data) @@ -24,6 +36,7 @@ importFrom(simmer,add_generator) importFrom(simmer,add_resource) importFrom(simmer,get_mon_arrivals) importFrom(simmer,get_mon_resources) +importFrom(simmer,now) importFrom(simmer,release) importFrom(simmer,run) importFrom(simmer,seize) @@ -32,6 +45,8 @@ importFrom(simmer,timeout) importFrom(simmer,trajectory) importFrom(simmer,wrap) importFrom(stats,rexp) +importFrom(stats,sd) +importFrom(stats,t.test) importFrom(tidyr,drop_na) importFrom(tidyr,pivot_wider) importFrom(tidyselect,any_of) diff --git a/R/choose_replications.R b/R/choose_replications.R new file mode 100644 index 0000000..793bfb0 --- /dev/null +++ b/R/choose_replications.R @@ -0,0 +1,113 @@ +#' Use the confidence interval method to select the number of replications. +#' +#' @param replications Number of times to run the model. +#' @param desired_precision Desired mean deviation from confidence interval. +#' @param metric Name of performance metric to assess. +#' @param yaxis_title Label for y axis. +#' @param path Path inc. filename to save figure to. +#' @param min_rep A suggested minimum number of replications (default=NULL). +#' +#' @importFrom stats sd t.test +#' @importFrom dplyr filter slice_head select pull +#' @importFrom ggplot2 ggplot aes geom_line geom_ribbon geom_vline labs +#' theme_minimal ggsave +#' @importFrom rlang .data +#' +#' @return Dataframe with results from each replication. +#' @export + +confidence_interval_method <- function(replications, desired_precision, metric, + yaxis_title, path, min_rep = NULL) { + # Run model for specified number of replications + param <- parameters(number_of_runs = replications) + raw_results <- runner(param) + results <- get_run_results(raw_results) + + # If mean of metric is less than 1, multiply by 100 + if (mean(results[[metric]]) < 1L) { + results[[paste0("adj_", metric)]] <- results[[metric]] * 100L + metric <- paste0("adj_", metric) + } + + # Initialise list to store the results + cumulative_list <- list() + + # For each row in the dataframe, filter to rows up to the i-th replication + # then perform calculations + for (i in 1L:replications) { + + # Filter rows up to the i-th replication + subset <- results[[metric]][1L:i] + + # Calculate mean + mean <- mean(subset) + + # Some calculations require more than 1 observation else will error... + if (i == 1L) { + # When only one observation, set to NA + std_dev <- NA + ci_lower <- NA + ci_upper <- NA + deviation <- NA + } else { + # Else, calculate standard deviation, 95% confidence interval, and + # percentage deviation + std_dev <- sd(subset) + ci <- t.test(subset)[["conf.int"]] + ci_lower <- ci[[1L]] + ci_upper <- ci[[2L]] + deviation <- ((ci_upper - mean) / mean) * 100L + } + + # Append to the cumulative list + cumulative_list[[i]] <- data.frame( + replications = i, + cumulative_mean = mean, + cumulative_std = std_dev, + ci_lower = ci_lower, + ci_upper = ci_upper, + perc_deviation = deviation + ) + } + + # Combine the list into a single data frame + cumulative <- do.call(rbind, cumulative_list) + + # Get the minimum number of replications where deviation is less than target + compare <- cumulative %>% + filter(.data[["perc_deviation"]] <= desired_precision * 100L) + if (nrow(compare) > 0L) { + # Get minimum number + n_reps <- compare %>% + slice_head() %>% + dplyr::select(replications) %>% + pull() + print(paste0("Reached desired precision (", desired_precision, ") in ", + n_reps, " replications.")) + } else { + warning("Running ", replications, " replications did not reach ", + "desired precision (", desired_precision, ").") + } + + # Plot the cumulative mean and confidence interval + p <- ggplot(cumulative, aes(x = .data[["replications"]], + y = .data[["cumulative_mean"]])) + + geom_line() + + geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), alpha = 0.2) + + # If specified, plot the minimum suggested number of replications + if (!is.null(min_rep)) { + p <- p + + geom_vline(xintercept = min_rep, linetype = "dashed", color = "red") + } + + # Modify labels and style + p <- p + + labs(x = "Replications", y = yaxis_title) + + theme_minimal() + + # Save the plot + ggsave(filename = path, width = 6.5, height = 4L, bg = "white") + + return(cumulative) +} diff --git a/R/defaults.R b/R/defaults.R deleted file mode 100644 index 1a217e9..0000000 --- a/R/defaults.R +++ /dev/null @@ -1,102 +0,0 @@ -#' @title R6 class returning list of default model parameters -#' -#' @description -#' `Defaults` is an R6 class instead of a function because this allows us to -#' return a list whilst also having functionality which only allows -#' modification of existing attributes, and not the addition of new attributes. -#' This helps avoid an error where a parameter appears to have been changed, -#' but remains the same as the attribute name used was incorrect. -#' -#' The returned list contains the following parameters: -#' -#' \itemize{ -#' \item \code{patient_inter}: Mean inter-arrival time between patients -#' in minutes. -#' \item \code{mean_n_consult_time}: Mean nurse consultation time in -#' minutes. -#' \item \code{number_of_nurses}: Number of available nurses (integer). -#' \item \code{data_collection_period}: Duration of data collection -#' period in minutes. -#' \item \code{number_of_runs}: Number of simulation runs (integer). -#' \item \code{scenario_name}: Label for the scenario (int|float|string). -#' \item \code{cores}: Number of cores to use for parallel execution -#' (integer). -#' \item \code{log_to_console}: Whether to print activity log to console. -#' \item \code{log_to_file}: Whether to save activity log to file. -#' \item \code{file_path}: Path to save log to file. -#' } -#' -#' @docType class -#' @importFrom R6 R6Class -#' @export -#' @rdname defaults - -Defaults <- R6Class( # nolint - - classname = "Defaults", - private = list( - defaults = list( - patient_inter = 4L, - mean_n_consult_time = 10L, - number_of_nurses = 5L, - data_collection_period = 80L, - number_of_runs = 100L, - scenario_name = NULL, - cores = 1L, - log_to_console = FALSE, - log_to_file = FALSE, - file_path = NULL - ), - allowed_keys = NULL - ), - - public = list( - - #' @description - #' Initialises the R6 object, setting the allowed keys based on the - #' defaults. - initialize = function() { - private[["allowed_keys"]] <- names(private[["defaults"]]) - }, - - #' @description - #' Retrieves the current list of default parameters. - get = function() { - private[["defaults"]] - }, - - #' @description - #' Update the defaults list with new values. - #' @param new_values A named list containing the parameters to be updated. - update = function(new_values) { - new_keys <- names(new_values) - invalid_keys <- new_keys[!new_keys %in% private[["allowed_keys"]]] - if (length(invalid_keys) > 0L) { - stop( - "Error: Attempted to add the following invalid keys: ", - toString(invalid_keys), - "\nYou can only have keys in the defaults list:\n", - paste(private[["allowed_keys"]], collapse = ",\n") - ) - } - for (key in new_keys) { - private[["defaults"]][[key]] <- new_values[[key]] - } - } - ) -) - - -#' Wrapper for Defaults() R6 class, to create a new instance. -#' -#' `defaults()` is a wrapper which enables us to create a new instance of -#' the class without needing to run `Defaults[["new"]]()` every time. -#' -#' @return Instance of the Defaults class. -#' -#' @export -#' @rdname defaults - -defaults <- function() { - return(Defaults[["new"]]()) -} diff --git a/R/process_replications.R b/R/get_run_results.R similarity index 96% rename from R/process_replications.R rename to R/get_run_results.R index 8cad404..56558e9 100644 --- a/R/process_replications.R +++ b/R/get_run_results.R @@ -1,4 +1,4 @@ -#' Process results from each replication. +#' Get results from each replication. #' #' For each replication (there can be one or many), calculate the: (1) number #' of arrivals, (2) mean wait time for each resource, (3) mean activity time @@ -22,14 +22,14 @@ #' @importFrom dplyr group_by summarise n_distinct mutate lead full_join #' @importFrom purrr reduce #' @importFrom rlang .data -#' @importFrom simmer get_mon_resources get_mon_arrivals +#' @importFrom simmer get_mon_resources get_mon_arrivals now #' @importFrom tidyr pivot_wider drop_na #' @importFrom tidyselect any_of #' #' @return Tibble with results from each replication. #' @export -process_replications <- function(results) { +get_run_results <- function(results) { # Remove patients who were still waiting and had not completed results[["arrivals"]] <- results[["arrivals"]] %>% diff --git a/R/model.R b/R/model.R index 79a17ba..d301032 100644 --- a/R/model.R +++ b/R/model.R @@ -1,9 +1,9 @@ -#' Run simulation +#' Run simulation. #' #' @param run_number Integer representing index of current simulation run. -#' @param param_class Instance of Defaults containing model parameters. +#' @param param Named list of model parameters. #' @param set_seed Whether to set seed within the model function (which we -#' may not wish to do if being set elsewhere - such as done in trial()). +#' may not wish to do if being set elsewhere - such as done in runner()). #' Default is TRUE. #' #' @importFrom simmer trajectory seize timeout release simmer add_resource @@ -15,14 +15,7 @@ #' @return Named list with two tables: monitored arrivals and resources #' @export -model <- function(run_number, param_class, set_seed = TRUE) { - - # Extract parameter list from the parameter class - # It is important to do this within the model function (rather than - # beforehand), to ensure any updates to the parameter list undergo - # checks from the Defaults R6 class (i.e. ensuring they are replacing - # existing keys in the list) - param <- param_class[["get"]]() +model <- function(run_number, param, set_seed = TRUE) { # Check all inputs are valid valid_inputs(run_number, param) @@ -76,36 +69,102 @@ model <- function(run_number, param_class, set_seed = TRUE) { arrivals = get_mon_arrivals(env, per_resource = TRUE, ongoing = TRUE), resources = get_mon_resources(env) ) + # Replace replication with appropriate run number (as these functions # assume, if not supplied with list of envs, that there was one replication) result[["arrivals"]][["replication"]] <- run_number result[["resources"]][["replication"]] <- run_number + # Add a column with the wait time of patients who remained unseen at the end + # of the simulation + result[["arrivals"]] <- result[["arrivals"]] %>% + mutate(q_time_unseen = ifelse(is.na(.data[["activity_time"]]), + now(env) - .data[["start_time"]], + NA)) return(result) } - -#' Check validity of input parameters +#' Validate input parameters for the simulation. #' #' @param run_number Integer representing index of current simulation run. #' @param param List containing parameters for the simulation. #' +#' @return Throws an error if any parameter is invalid. #' @export valid_inputs <- function(run_number, param) { + check_run_number(run_number) + check_param_names(param) + check_param_values(param) +} + +#' Checks if the run number is a non-negative integer. +#' +#' @param run_number Integer representing index of current simulation run. +#' +#' @return Throws an error if the run number is invalid. - # Check that the run number is an non-negative integer +check_run_number <- function(run_number) { if (run_number < 0L || run_number %% 1L != 0L) { stop("The run number must be a non-negative integer. Provided: ", run_number) } +} + +#' Validate parameter names. +#' +#' Ensure that all required parameters are present, and no extra parameters are +#' provided. +#' +#' @param param List containing parameters for the simulation. +#' +#' @return Throws an error if there are missing or extra parameters. + +check_param_names <- function(param) { + # Get valid argument names from the function + valid_names <- names(formals(parameters)) + + # Get names from input parameter list + input_names <- names(param) + + # Find missing keys (i.e. are there things in valid_names not in input) + # and extra keys (i.e. are there things in input not in valid_names) + missing_keys <- setdiff(valid_names, input_names) + extra_keys <- setdiff(input_names, valid_names) + + # If there are any missing or extra keys, throw an error + if (length(missing_keys) > 0L || length(extra_keys) > 0L) { + error_message <- "" + if (length(missing_keys) > 0L) { + error_message <- paste0( + error_message, "Missing keys: ", toString(missing_keys), ". " + ) + } + if (length(extra_keys) > 0L) { + error_message <- paste0( + error_message, "Extra keys: ", toString(extra_keys), ". " + ) + } + stop(error_message) + } +} + +#' Validate parameter values. +#' +#' Ensure that specific parameters are positive numbers, or non-negative +#' integers. +#' +#' @param param List containing parameters for the simulation. +#' +#' @return Throws an error if any specified parameter value is invalid. + +check_param_values <- function(param) { # Check that listed parameters are always positive p_list <- c("patient_inter", "mean_n_consult_time", "number_of_runs") for (p in p_list) { if (param[[p]] <= 0L) { - stop("The parameter '", p, - "' must be positive. Provided: ", param[[p]]) + stop('The parameter "', p, '" must be greater than 0.') } } @@ -113,8 +172,8 @@ valid_inputs <- function(run_number, param) { n_list <- c("data_collection_period", "number_of_nurses") for (n in n_list) { if (param[[n]] < 0L || param[[n]] %% 1L != 0L) { - stop("The parameter '", n, - "' must not be a non-negative integer. Provided: ", param[[n]]) + stop('The parameter "', n, + '" must be an integer greater than or equal to 0.') } } } diff --git a/R/parameters.R b/R/parameters.R new file mode 100644 index 0000000..507088f --- /dev/null +++ b/R/parameters.R @@ -0,0 +1,35 @@ +#' Create a named list of default model parameters (which can be altered). +#' +#' When input to model(), valid_inputs() will fetch the inputs to this +#' function and compare them against the provided list, to ensure no +#' new keys have been add to the list. +#' +#' @param patient_inter Mean inter-arrival time between patients in minutes. +#' @param mean_n_consult_time Mean nurse consultation time in minutes. +#' @param number_of_nurses Number of available nurses (integer). +#' @param data_collection_period Duration of data collection period in +#' minutes. +#' @param number_of_runs Number of simulation runs (integer). +#' @param scenario_name Label for the scenario (int|float|string). +#' @param cores Number of cores to use for parallel execution (integer). +#' @param log_to_console Whether to print activity log to console. +#' @param log_to_file Whether to save activity log to file. +#' @param file_path Path to save log to file. +#' +#' @return A named list containing the parameters for the model. +#' @export + +parameters <- function( + patient_inter = 4L, + mean_n_consult_time = 10L, + number_of_nurses = 5L, + data_collection_period = 80L, + number_of_runs = 100L, + scenario_name = NULL, + cores = 1L, + log_to_console = FALSE, + log_to_file = FALSE, + file_path = NULL +) { + return(as.list(environment())) +} diff --git a/R/trial.R b/R/runner.R similarity index 75% rename from R/trial.R rename to R/runner.R index 9775a45..38e168c 100644 --- a/R/trial.R +++ b/R/runner.R @@ -1,6 +1,6 @@ #' Run simulation for multiple replications, sequentially or in parallel. #' -#' @param param_class Instance of Defaults containing model parameters. +#' @param param Named list of model parameters. #' #' @importFrom future plan multisession sequential #' @importFrom future.apply future_lapply @@ -8,19 +8,15 @@ #' @return Named list with two tables: monitored arrivals and resources. #' @export -trial <- function(param_class) { - # Get specified number of cores - n_cores <- param_class[["get"]]()[["cores"]] - n_runs <- param_class[["get"]]()[["number_of_runs"]] - +runner <- function(param) { # Determine the parallel execution plan - if (n_cores == 1L) { + if (param[["cores"]] == 1L) { plan(sequential) # Sequential execution } else { - if (n_cores == -1L) { + if (param[["cores"]] == -1L) { cores <- future::availableCores() - 1L } else { - cores <- n_cores + cores <- param[["cores"]] } plan(multisession, workers = cores) # Parallel execution } @@ -29,17 +25,19 @@ trial <- function(param_class) { # Mark set_seed as FALSE as we handle this using future.seed(), rather than # within the function, and we don't want to override future.seed results <- future_lapply( - 1L:n_runs, + 1L:param[["number_of_runs"]], function(i) { simulation::model(run_number = i, - param_class = param_class, + param = param, set_seed = FALSE) }, future.seed = 123456L ) # Combine the results from multiple replications into just two dataframes - if (n_runs > 1L) { + if (param[["number_of_runs"]] == 1L) { + results <- results[[1L]] + } else { all_arrivals <- do.call( rbind, lapply(results, function(x) x[["arrivals"]]) ) diff --git a/R/scenarios.R b/R/scenarios.R new file mode 100644 index 0000000..b25e746 --- /dev/null +++ b/R/scenarios.R @@ -0,0 +1,57 @@ +#' Run a set of scenarios +#' +#' @param scenarios List where key is name of parameter and value is a list of +#' different values to run in scenarios +#' @param base_list List of parameters to use as base for scenarios, which can +#' be partial (as will input to parameters() function). +#' +#' @return Tibble with results from each replication for each scenario. +#' @export + +run_scenarios <- function(scenarios, base_list) { + # Generate all permutations of the scenarios + all_scenarios <- expand.grid(scenarios) + + # Preview the number of scenarios + print(sprintf("There are %d scenarios. Running:", nrow(all_scenarios))) + + results <- list() + + # Iterate through each scenario + for (index in seq_len(nrow(all_scenarios))) { + + # Filter to one of the scenarios + scenario_to_run <- all_scenarios[index, , drop = FALSE] + + # Print the scenario parameters + formatted_scenario <- toString( + paste0(names(scenario_to_run), " = ", scenario_to_run) + ) + print(paste0("Scenario: ", formatted_scenario)) + + # Create parameter list with scenario-specific values + args <- c(scenario_to_run, list(scenario_name = index)) + + # Create instance of parameter class with specified base parameters + param <- do.call(parameters, base_list) + + # Update parameter list with the scenario parameters + for (name in names(args)) { + param[[name]] <- args[[name]] + } + + # Run replications for the current scenario and process results + raw_results <- runner(param) + scenario_result <- get_run_results(raw_results) + + # Append scenario parameters to the results + scenario_result[["scenario"]] <- index + for (key in names(scenario_to_run)) { + scenario_result[[key]] <- scenario_to_run[[key]] + } + + # Append to results list + results[[index]] <- scenario_result + } + return(do.call(rbind, results)) +} diff --git a/README.md b/README.md index b10c3be..7928546 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@
-# Simple Reproducible R
Discrete-Event Simulation (DES) Template +# R DES RAP Template ![R 4.4.1](https://img.shields.io/badge/-R_4.4.1-276DC2?logo=r&logoColor=white) @@ -10,7 +10,7 @@ -A simple template for creating DES models in R, within a **reproducible analytical pipeline (RAP)**
+A template for creating **discrete-event simulation (DES)** models in R within a **reproducible analytical pipeline (RAP)**.

Click on Use this template to initialise new repository.
A `README` template is provided at the **end of this file**. @@ -170,6 +170,14 @@ sudo apt install libglpk-dev libxml2-dev πŸ”Ž Complete the Strengthening The Reporting of Empirical Simulation Studies (STRESS) checklist (`stress_des.md`) and use this to support writing publication/report, and attach as an appendice to report. +πŸ“” **RMarkdown** + +Several Rmarkdown files are provided which give examples running the models, choosing parameters, and explaining how things work. If you wish to knit all rmarkdown files from the command line, you can use the provided bash script by running: + +``` +bash run_rmarkdown.sh +``` + πŸ”Ž **Tests** To run tests: @@ -239,7 +247,8 @@ repo/ The overall run time will vary depending on how the template model is used. A few example implementations are provided in `rmarkdown/` and the run times for these were: * `analysis.Rmd`: 42s -* `choosing_parameters.Rmd`: 56s +* `choosing_cores.Rmd`: 1m 14s +* `choosing_replications.Rmd`: 2s * `generate_exp_results.Rmd`: 0s @@ -250,7 +259,7 @@ These times were obtained on an Intel Core i7-12700H with 32GB RAM running Ubunt ## πŸ“ Citation - + > Heather, A. (2025). Simple Reproducible R Discrete-Event Simulation (DES) Template. GitHub. https://github.com/pythonhealthdatascience/rap_template_r_des. diff --git a/man/check_param_names.Rd b/man/check_param_names.Rd new file mode 100644 index 0000000..ee31d54 --- /dev/null +++ b/man/check_param_names.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model.R +\name{check_param_names} +\alias{check_param_names} +\title{Validate parameter names.} +\usage{ +check_param_names(param) +} +\arguments{ +\item{param}{List containing parameters for the simulation.} +} +\value{ +Throws an error if there are missing or extra parameters. +} +\description{ +Ensure that all required parameters are present, and no extra parameters are +provided. +} diff --git a/man/check_param_values.Rd b/man/check_param_values.Rd new file mode 100644 index 0000000..c113a43 --- /dev/null +++ b/man/check_param_values.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model.R +\name{check_param_values} +\alias{check_param_values} +\title{Validate parameter values.} +\usage{ +check_param_values(param) +} +\arguments{ +\item{param}{List containing parameters for the simulation.} +} +\value{ +Throws an error if any specified parameter value is invalid. +} +\description{ +Ensure that specific parameters are positive numbers, or non-negative +integers. +} diff --git a/man/check_run_number.Rd b/man/check_run_number.Rd new file mode 100644 index 0000000..924e9b8 --- /dev/null +++ b/man/check_run_number.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model.R +\name{check_run_number} +\alias{check_run_number} +\title{Checks if the run number is a non-negative integer.} +\usage{ +check_run_number(run_number) +} +\arguments{ +\item{run_number}{Integer representing index of current simulation run.} +} +\value{ +Throws an error if the run number is invalid. +} +\description{ +Checks if the run number is a non-negative integer. +} diff --git a/man/confidence_interval_method.Rd b/man/confidence_interval_method.Rd new file mode 100644 index 0000000..4fd329f --- /dev/null +++ b/man/confidence_interval_method.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/choose_replications.R +\name{confidence_interval_method} +\alias{confidence_interval_method} +\title{Use the confidence interval method to select the number of replications.} +\usage{ +confidence_interval_method( + replications, + desired_precision, + metric, + yaxis_title, + path, + min_rep = NULL +) +} +\arguments{ +\item{replications}{Number of times to run the model.} + +\item{desired_precision}{Desired mean deviation from confidence interval.} + +\item{metric}{Name of performance metric to assess.} + +\item{yaxis_title}{Label for y axis.} + +\item{path}{Path inc. filename to save figure to.} + +\item{min_rep}{A suggested minimum number of replications (default=NULL).} +} +\value{ +Dataframe with results from each replication. +} +\description{ +Use the confidence interval method to select the number of replications. +} diff --git a/man/defaults.Rd b/man/defaults.Rd deleted file mode 100644 index c42df92..0000000 --- a/man/defaults.Rd +++ /dev/null @@ -1,107 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/defaults.R -\docType{class} -\name{Defaults} -\alias{Defaults} -\alias{defaults} -\title{R6 class returning list of default model parameters} -\usage{ -defaults() -} -\value{ -Instance of the Defaults class. -} -\description{ -`Defaults` is an R6 class instead of a function because this allows us to -return a list whilst also having functionality which only allows -modification of existing attributes, and not the addition of new attributes. -This helps avoid an error where a parameter appears to have been changed, -but remains the same as the attribute name used was incorrect. - -The returned list contains the following parameters: - -\itemize{ - \item \code{patient_inter}: Mean inter-arrival time between patients - in minutes. - \item \code{mean_n_consult_time}: Mean nurse consultation time in - minutes. - \item \code{number_of_nurses}: Number of available nurses (integer). - \item \code{data_collection_period}: Duration of data collection - period in minutes. - \item \code{number_of_runs}: Number of simulation runs (integer). - \item \code{scenario_name}: Label for the scenario (int|float|string). - \item \code{cores}: Number of cores to use for parallel execution - (integer). - \item \code{log_to_console}: Whether to print activity log to console. - \item \code{log_to_file}: Whether to save activity log to file. - \item \code{file_path}: Path to save log to file. -} - -`defaults()` is a wrapper which enables us to create a new instance of -the class without needing to run `Defaults[["new"]]()` every time. -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-Defaults-new}{\code{Defaults$new()}} -\item \href{#method-Defaults-get}{\code{Defaults$get()}} -\item \href{#method-Defaults-update}{\code{Defaults$update()}} -\item \href{#method-Defaults-clone}{\code{Defaults$clone()}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Defaults-new}{}}} -\subsection{Method \code{new()}}{ -Initialises the R6 object, setting the allowed keys based on the -defaults. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Defaults$new()}\if{html}{\out{
}} -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Defaults-get}{}}} -\subsection{Method \code{get()}}{ -Retrieves the current list of default parameters. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Defaults$get()}\if{html}{\out{
}} -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Defaults-update}{}}} -\subsection{Method \code{update()}}{ -Update the defaults list with new values. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Defaults$update(new_values)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{new_values}}{A named list containing the parameters to be updated.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Defaults-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Defaults$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/process_replications.Rd b/man/get_run_results.Rd similarity index 85% rename from man/process_replications.Rd rename to man/get_run_results.Rd index 3e254fe..679da04 100644 --- a/man/process_replications.Rd +++ b/man/get_run_results.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/process_replications.R -\name{process_replications} -\alias{process_replications} -\title{Process results from each replication.} +% Please edit documentation in R/get_run_results.R +\name{get_run_results} +\alias{get_run_results} +\title{Get results from each replication.} \usage{ -process_replications(results) +get_run_results(results) } \arguments{ \item{results}{Named list with `arrivals` containing output from diff --git a/man/model.Rd b/man/model.Rd index 387ef06..19ee847 100644 --- a/man/model.Rd +++ b/man/model.Rd @@ -2,22 +2,22 @@ % Please edit documentation in R/model.R \name{model} \alias{model} -\title{Run simulation} +\title{Run simulation.} \usage{ -model(run_number, param_class, set_seed = TRUE) +model(run_number, param, set_seed = TRUE) } \arguments{ \item{run_number}{Integer representing index of current simulation run.} -\item{param_class}{Instance of Defaults containing model parameters.} +\item{param}{Named list of model parameters.} \item{set_seed}{Whether to set seed within the model function (which we -may not wish to do if being set elsewhere - such as done in trial()). +may not wish to do if being set elsewhere - such as done in runner()). Default is TRUE.} } \value{ Named list with two tables: monitored arrivals and resources } \description{ -Run simulation +Run simulation. } diff --git a/man/parameters.Rd b/man/parameters.Rd new file mode 100644 index 0000000..73a62e1 --- /dev/null +++ b/man/parameters.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parameters.R +\name{parameters} +\alias{parameters} +\title{Create a named list of default model parameters (which can be altered).} +\usage{ +parameters( + patient_inter = 4L, + mean_n_consult_time = 10L, + number_of_nurses = 5L, + data_collection_period = 80L, + number_of_runs = 100L, + scenario_name = NULL, + cores = 1L, + log_to_console = FALSE, + log_to_file = FALSE, + file_path = NULL +) +} +\arguments{ +\item{patient_inter}{Mean inter-arrival time between patients in minutes.} + +\item{mean_n_consult_time}{Mean nurse consultation time in minutes.} + +\item{number_of_nurses}{Number of available nurses (integer).} + +\item{data_collection_period}{Duration of data collection period in +minutes.} + +\item{number_of_runs}{Number of simulation runs (integer).} + +\item{scenario_name}{Label for the scenario (int|float|string).} + +\item{cores}{Number of cores to use for parallel execution (integer).} + +\item{log_to_console}{Whether to print activity log to console.} + +\item{log_to_file}{Whether to save activity log to file.} + +\item{file_path}{Path to save log to file.} +} +\value{ +A named list containing the parameters for the model. +} +\description{ +When input to model(), valid_inputs() will fetch the inputs to this +function and compare them against the provided list, to ensure no +new keys have been add to the list. +} diff --git a/man/run_scenarios.Rd b/man/run_scenarios.Rd new file mode 100644 index 0000000..92ba13e --- /dev/null +++ b/man/run_scenarios.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/scenarios.R +\name{run_scenarios} +\alias{run_scenarios} +\title{Run a set of scenarios} +\usage{ +run_scenarios(scenarios, base_list) +} +\arguments{ +\item{scenarios}{List where key is name of parameter and value is a list of +different values to run in scenarios} + +\item{base_list}{List of parameters to use as base for scenarios, which can +be partial (as will input to parameters() function).} +} +\value{ +Tibble with results from each replication for each scenario. +} +\description{ +Run a set of scenarios +} diff --git a/man/trial.Rd b/man/runner.Rd similarity index 66% rename from man/trial.Rd rename to man/runner.Rd index a7538a2..e351cc0 100644 --- a/man/trial.Rd +++ b/man/runner.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/trial.R -\name{trial} -\alias{trial} +% Please edit documentation in R/runner.R +\name{runner} +\alias{runner} \title{Run simulation for multiple replications, sequentially or in parallel.} \usage{ -trial(param_class) +runner(param) } \arguments{ -\item{param_class}{Instance of Defaults containing model parameters.} +\item{param}{Named list of model parameters.} } \value{ Named list with two tables: monitored arrivals and resources. diff --git a/man/valid_inputs.Rd b/man/valid_inputs.Rd index b17f432..0c02e7e 100644 --- a/man/valid_inputs.Rd +++ b/man/valid_inputs.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/model.R \name{valid_inputs} \alias{valid_inputs} -\title{Check validity of input parameters} +\title{Validate input parameters for the simulation.} \usage{ valid_inputs(run_number, param) } @@ -11,6 +11,9 @@ valid_inputs(run_number, param) \item{param}{List containing parameters for the simulation.} } +\value{ +Throws an error if any parameter is invalid. +} \description{ -Check validity of input parameters +Validate input parameters for the simulation. } diff --git a/outputs/base_trial.csv b/outputs/base_run_results.csv similarity index 100% rename from outputs/base_trial.csv rename to outputs/base_run_results.csv diff --git a/outputs/cores1.png b/outputs/cores1.png index ff130e7..12b1982 100644 Binary files a/outputs/cores1.png and b/outputs/cores1.png differ diff --git a/outputs/cores2.png b/outputs/cores2.png index e60ffa5..b866488 100644 Binary files a/outputs/cores2.png and b/outputs/cores2.png differ diff --git a/outputs/scenario_nurse_util_compare_templates.png b/outputs/scenario_nurse_util_compare_templates.png new file mode 100644 index 0000000..51eadfd Binary files /dev/null and b/outputs/scenario_nurse_util_compare_templates.png differ diff --git a/outputs/scenario_nurse_wait_compare_templates.png b/outputs/scenario_nurse_wait_compare_templates.png new file mode 100644 index 0000000..129eaf8 Binary files /dev/null and b/outputs/scenario_nurse_wait_compare_templates.png differ diff --git a/renv.lock b/renv.lock index 826f1b3..bb4f913 100644 --- a/renv.lock +++ b/renv.lock @@ -1104,7 +1104,7 @@ }, "parallelly": { "Package": "parallelly", - "Version": "1.41.0", + "Version": "1.42.0", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1112,7 +1112,23 @@ "tools", "utils" ], - "Hash": "f5ae11e8e15ee84950752e7b01c55295" + "Hash": "78f830734a4b488f2c72bf00cde6381e" + }, + "patrick": { + "Package": "patrick", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "dplyr", + "glue", + "purrr", + "rlang", + "testthat", + "tibble" + ], + "Hash": "a1ab7291b1a3321ebacd438ebb73c650" }, "pillar": { "Package": "pillar", @@ -1293,7 +1309,7 @@ }, "purrr": { "Package": "purrr", - "Version": "1.0.2", + "Version": "1.0.4", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1304,7 +1320,7 @@ "rlang", "vctrs" ], - "Hash": "1cba04a4e9414bdefc9dcaa99649a8dc" + "Hash": "cc8b5d43f90551fa6df0a6be5d640a4f" }, "ragg": { "Package": "ragg", diff --git a/rmarkdown/analysis.Rmd b/rmarkdown/analysis.Rmd index 4e1916c..c44bc40 100644 --- a/rmarkdown/analysis.Rmd +++ b/rmarkdown/analysis.Rmd @@ -5,6 +5,7 @@ date: "`r Sys.Date()`" output: github_document: toc: true + html_preview: false --- This notebook presents execution and results from: @@ -22,7 +23,7 @@ The run time is provided at the end of the notebook. Install the latest version of the local simulation package. ```{r} -devtools::install() +devtools::load_all() ``` Import required packages. @@ -58,16 +59,16 @@ output_dir <- file.path("..", "outputs") Run with default parameters. ```{r} -envs <- trial(param = defaults()) +raw_results <- runner(param = parameters()) ``` Process results and save to `.csv`. ```{r} -trial_results <- process_replications(envs) -head(trial_results) +run_results <- get_run_results(raw_results) +head(run_results) -write.csv(trial_results, file.path(output_dir, "base_trial.csv")) +write.csv(run_results, file.path(output_dir, "base_run_results.csv")) ``` ## View spread of results across replication @@ -84,7 +85,7 @@ write.csv(trial_results, file.path(output_dir, "base_trial.csv")) plot_results_spread <- function(column, x_label, file) { # Generate plot - p <- ggplot(trial_results, aes(.data[[column]])) + + p <- ggplot(run_results, aes(.data[[column]])) + geom_histogram(bins = 10L) + labs(x = x_label, y = "Frequency") + theme_minimal() @@ -119,59 +120,6 @@ plot_results_spread(column = "utilisation_nurse", ## Scenario analysis -```{r} -#' Run a set of scenarios -#' -#' @param scenarios List where key is name of parameter and value is a list of -#' different values to run in scenarios -#' -#' @return Tibble with results from each replication for each scenario. - -run_scenarios <- function(scenarios) { - # Generate all permutations of the scenarios - all_scenarios <- expand.grid(scenarios) - - # Preview the number of scenarios - print(sprintf("There are %d scenarios. Running:", nrow(all_scenarios))) - - results <- list() - - # Iterate through each scenario - for (index in seq_len(nrow(all_scenarios))) { - - # Filter to one of the scenarios - scenario_to_run <- all_scenarios[index, , drop = FALSE] - - # Print the scenario parameters - formatted_scenario <- toString( - paste0(names(scenario_to_run), " = ", scenario_to_run) - ) - print(paste0("Scenario: ", formatted_scenario)) - - # Overwrite defaults with scenario-specific values - param_class <- defaults() - param_class[["update"]](list(scenario_name = index)) - param_class[["update"]](scenario_to_run) - - # Run the trial for the current scenario - envs <- trial(param_class) - - # Extract results - scenario_result <- process_replications(envs) - - # Append scenario parameters to the results - scenario_result[["scenario"]] <- index - for (key in names(scenario_to_run)) { - scenario_result[[key]] <- scenario_to_run[[key]] - } - - # Append to results list - results[[index]] <- scenario_result - } - return(do.call(rbind, results)) -} -``` - ```{r} # Run scenario analysis scenarios <- list( @@ -179,7 +127,7 @@ scenarios <- list( number_of_nurses = c(5L, 6L, 7L, 8L) ) -scenario_results <- run_scenarios(scenarios) +scenario_results <- run_scenarios(scenarios, base_list = parameters()) ``` ```{r} @@ -315,6 +263,76 @@ print(table_latex, file = file.path(output_dir, "scenario_nurse_util.tex")) ``` +### Running a basic example (which can compare to Python template) + +To enable comparison between the templates, this section runs the model with a simple set of base case parameters (matched to Python), and then running some scenarios on top of that base case. + +```{r} +# Define the base param for this altered run +new_base <- parameters( + patient_inter = 4L, + mean_n_consult_time = 10L, + number_of_nurses = 5L, + # No warm-up (not possible in R, but set to 0 in Python) + data_collection_period = 1440L, + number_of_runs = 10L, + cores = 1L +) + +# Define scenarios +scenarios <- list( + patient_inter = c(3L, 4L, 5L, 6L, 7L), + number_of_nurses = c(5L, 6L, 7L, 8L) +) + +# Run scenarios +compare_template_results <- run_scenarios(scenarios, new_base) +``` + +```{r} +# Preview scenario results dataframe +print(dim(compare_template_results)) +head(compare_template_results) +``` + +```{r} +# Define path +path <- file.path(output_dir, "scenario_nurse_wait_compare_templates.png") + +# Calculate results and generate plot +result <- plot_scenario( + results = compare_template_results, + x_var = "patient_inter", + result_var = "mean_waiting_time_nurse", + colour_var = "number_of_nurses", + xaxis_title = "Patient inter-arrival time", + yaxis_title = "Mean wait time for nurse (minutes)", + legend_title = "Nurses", + path = path +) + +# View plot +include_graphics(path) + +# Define path +path <- file.path(output_dir, "scenario_nurse_util_compare_templates.png") + +# Calculate results and generate plot +result <- plot_scenario( + results = compare_template_results, + x_var = "patient_inter", + result_var = "utilisation_nurse", + colour_var = "number_of_nurses", + xaxis_title = "Patient inter-arrival time", + yaxis_title = "Mean nurse utilisation", + legend_title = "Nurses", + path = path +) + +# View plot +include_graphics(path) +``` + ## Sensitivity analysis Can use similar code to perform sensitivity analyses. @@ -327,7 +345,7 @@ Can use similar code to perform sensitivity analyses. ```{r} # Run sensitivity analysis consult <- list(mean_n_consult_time = c(8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L)) -sensitivity_consult <- run_scenarios(consult) +sensitivity_consult <- run_scenarios(consult, base_list = parameters()) ``` ```{r} @@ -380,9 +398,8 @@ Note: In this model, if patients are still waiting to be seen at the end of the ```{r} -param_class <- defaults() -param_class[["update"]](list(patient_inter = 0.5)) -result <- model(run_number = 0L, param_class = param_class) +param <- parameters(patient_inter = 0.5) +result <- model(run_number = 0L, param = param) tail(result[["arrivals"]]) ``` @@ -393,12 +410,13 @@ Simmer has a `verbose` setting which will output activity information if set to In this example, the log prints to screen as we have set `log_to_console` to TRUE. This could also be saved to a file by setting `log_to_file` to TRUE and providing a `file_path` in `param`. ```{r} -param_class <- defaults() -param_class[["update"]](list(data_collection_period = 100L, - number_of_runs = 1L, - cores = 1L, - log_to_console = TRUE)) -verbose_run <- model(run_number = 0L, param_class = param_class) +param <- parameters( + data_collection_period = 100L, + number_of_runs = 1L, + cores = 1L, + log_to_console = TRUE +) +verbose_run <- model(run_number = 0L, param = param) ``` This will align with the recorded results of each patient. diff --git a/rmarkdown/analysis.md b/rmarkdown/analysis.md index bd6418c..f32c9bf 100644 --- a/rmarkdown/analysis.md +++ b/rmarkdown/analysis.md @@ -1,13 +1,15 @@ Analysis ================ Amy Heather -2025-01-27 +2025-03-04 - [Set up](#set-up) - [Default run](#default-run) - [View spread of results across replication](#view-spread-of-results-across-replication) - [Scenario analysis](#scenario-analysis) + - [Running a basic example (which can compare to Python + template)](#running-a-basic-example-which-can-compare-to-python-template) - [Sensitivity analysis](#sensitivity-analysis) - [NaN results](#nan-results) - [Example run with logs](#example-run-with-logs) @@ -35,34 +37,10 @@ The run time is provided at the end of the notebook. Install the latest version of the local simulation package. ``` r -devtools::install() -``` - - ## - ## ── R CMD build ───────────────────────────────────────────────────────────────── - ## checking for file β€˜/home/amy/Documents/stars/rap_template_r_des/DESCRIPTION’ ... βœ” checking for file β€˜/home/amy/Documents/stars/rap_template_r_des/DESCRIPTION’ - ## ─ preparing β€˜simulation’: - ## checking DESCRIPTION meta-information ... βœ” checking DESCRIPTION meta-information - ## ─ checking for LF line-endings in source and make files and shell scripts - ## ─ checking for empty or unneeded directories - ## Omitted β€˜LazyData’ from DESCRIPTION - ## ─ building β€˜simulation_0.1.0.tar.gz’ - ## - ## Running /opt/R/4.4.1/lib/R/bin/R CMD INSTALL \ - ## /tmp/Rtmpbqk15J/simulation_0.1.0.tar.gz --install-tests - ## * installing to library β€˜/home/amy/.cache/R/renv/library/rap_template_r_des-cd7d6844/linux-ubuntu-noble/R-4.4/x86_64-pc-linux-gnu’ - ## * installing *source* package β€˜simulation’ ... - ## ** using staged installation - ## ** R - ## ** tests - ## ** byte-compile and prepare package for lazy loading - ## ** help - ## *** installing help indices - ## ** building package indices - ## ** testing if installed package can be loaded from temporary location - ## ** testing if installed package can be loaded from final location - ## ** testing if installed package keeps a record of temporary installation path - ## * DONE (simulation) +devtools::load_all() +``` + + ## β„Ή Loading simulation Import required packages. @@ -97,14 +75,14 @@ output_dir <- file.path("..", "outputs") Run with default parameters. ``` r -envs <- trial(param = defaults()) +raw_results <- runner(param = parameters()) ``` Process results and save to `.csv`. ``` r -trial_results <- process_replications(envs) -head(trial_results) +run_results <- get_run_results(raw_results) +head(run_results) ``` ## # A tibble: 6 Γ— 5 @@ -119,7 +97,7 @@ head(trial_results) ## # β„Ή 1 more variable: utilisation_nurse ``` r -write.csv(trial_results, file.path(output_dir, "base_trial.csv")) +write.csv(run_results, file.path(output_dir, "base_run_results.csv")) ``` ## View spread of results across replication @@ -136,7 +114,7 @@ write.csv(trial_results, file.path(output_dir, "base_trial.csv")) plot_results_spread <- function(column, x_label, file) { # Generate plot - p <- ggplot(trial_results, aes(.data[[column]])) + + p <- ggplot(run_results, aes(.data[[column]])) + geom_histogram(bins = 10L) + labs(x = x_label, y = "Frequency") + theme_minimal() @@ -185,59 +163,6 @@ plot_results_spread(column = "utilisation_nurse", ## Scenario analysis -``` r -#' Run a set of scenarios -#' -#' @param scenarios List where key is name of parameter and value is a list of -#' different values to run in scenarios -#' -#' @return Tibble with results from each replication for each scenario. - -run_scenarios <- function(scenarios) { - # Generate all permutations of the scenarios - all_scenarios <- expand.grid(scenarios) - - # Preview the number of scenarios - print(sprintf("There are %d scenarios. Running:", nrow(all_scenarios))) - - results <- list() - - # Iterate through each scenario - for (index in seq_len(nrow(all_scenarios))) { - - # Filter to one of the scenarios - scenario_to_run <- all_scenarios[index, , drop = FALSE] - - # Print the scenario parameters - formatted_scenario <- toString( - paste0(names(scenario_to_run), " = ", scenario_to_run) - ) - print(paste0("Scenario: ", formatted_scenario)) - - # Overwrite defaults with scenario-specific values - param_class <- defaults() - param_class[["update"]](list(scenario_name = index)) - param_class[["update"]](scenario_to_run) - - # Run the trial for the current scenario - envs <- trial(param_class) - - # Extract results - scenario_result <- process_replications(envs) - - # Append scenario parameters to the results - scenario_result[["scenario"]] <- index - for (key in names(scenario_to_run)) { - scenario_result[[key]] <- scenario_to_run[[key]] - } - - # Append to results list - results[[index]] <- scenario_result - } - return(do.call(rbind, results)) -} -``` - ``` r # Run scenario analysis scenarios <- list( @@ -245,7 +170,7 @@ scenarios <- list( number_of_nurses = c(5L, 6L, 7L, 8L) ) -scenario_results <- run_scenarios(scenarios) +scenario_results <- run_scenarios(scenarios, base_list = parameters()) ``` ## [1] "There are 20 scenarios. Running:" @@ -422,7 +347,7 @@ print(table_latex) ``` ## % latex table generated in R 4.4.1 by xtable 1.8-4 package - ## % Mon Jan 27 16:03:02 2025 + ## % Tue Mar 4 15:50:08 2025 ## \begin{table}[ht] ## \centering ## \begin{tabular}{rrllll} @@ -444,6 +369,123 @@ print(table_latex, file = file.path(output_dir, "scenario_nurse_util.tex")) ``` +### Running a basic example (which can compare to Python template) + +To enable comparison between the templates, this section runs the model +with a simple set of base case parameters (matched to Python), and then +running some scenarios on top of that base case. + +``` r +# Define the base param for this altered run +new_base <- parameters( + patient_inter = 4L, + mean_n_consult_time = 10L, + number_of_nurses = 5L, + # No warm-up (not possible in R, but set to 0 in Python) + data_collection_period = 1440L, + number_of_runs = 10L, + cores = 1L +) + +# Define scenarios +scenarios <- list( + patient_inter = c(3L, 4L, 5L, 6L, 7L), + number_of_nurses = c(5L, 6L, 7L, 8L) +) + +# Run scenarios +compare_template_results <- run_scenarios(scenarios, new_base) +``` + + ## [1] "There are 20 scenarios. Running:" + ## [1] "Scenario: patient_inter = 3, number_of_nurses = 5" + ## [1] "Scenario: patient_inter = 4, number_of_nurses = 5" + ## [1] "Scenario: patient_inter = 5, number_of_nurses = 5" + ## [1] "Scenario: patient_inter = 6, number_of_nurses = 5" + ## [1] "Scenario: patient_inter = 7, number_of_nurses = 5" + ## [1] "Scenario: patient_inter = 3, number_of_nurses = 6" + ## [1] "Scenario: patient_inter = 4, number_of_nurses = 6" + ## [1] "Scenario: patient_inter = 5, number_of_nurses = 6" + ## [1] "Scenario: patient_inter = 6, number_of_nurses = 6" + ## [1] "Scenario: patient_inter = 7, number_of_nurses = 6" + ## [1] "Scenario: patient_inter = 3, number_of_nurses = 7" + ## [1] "Scenario: patient_inter = 4, number_of_nurses = 7" + ## [1] "Scenario: patient_inter = 5, number_of_nurses = 7" + ## [1] "Scenario: patient_inter = 6, number_of_nurses = 7" + ## [1] "Scenario: patient_inter = 7, number_of_nurses = 7" + ## [1] "Scenario: patient_inter = 3, number_of_nurses = 8" + ## [1] "Scenario: patient_inter = 4, number_of_nurses = 8" + ## [1] "Scenario: patient_inter = 5, number_of_nurses = 8" + ## [1] "Scenario: patient_inter = 6, number_of_nurses = 8" + ## [1] "Scenario: patient_inter = 7, number_of_nurses = 8" + +``` r +# Preview scenario results dataframe +print(dim(compare_template_results)) +``` + + ## [1] 200 8 + +``` r +head(compare_template_results) +``` + + ## # A tibble: 6 Γ— 8 + ## replication arrivals mean_waiting_time_nurse mean_activity_time_nurse + ## + ## 1 1 471 1.69 10.3 + ## 2 2 502 3.36 10.3 + ## 3 3 483 1.86 10.1 + ## 4 4 461 2.73 10.6 + ## 5 5 466 1.25 9.71 + ## 6 6 466 2.13 10.3 + ## # β„Ή 4 more variables: utilisation_nurse , scenario , + ## # patient_inter , number_of_nurses + +``` r +# Define path +path <- file.path(output_dir, "scenario_nurse_wait_compare_templates.png") + +# Calculate results and generate plot +result <- plot_scenario( + results = compare_template_results, + x_var = "patient_inter", + result_var = "mean_waiting_time_nurse", + colour_var = "number_of_nurses", + xaxis_title = "Patient inter-arrival time", + yaxis_title = "Mean wait time for nurse (minutes)", + legend_title = "Nurses", + path = path +) + +# View plot +include_graphics(path) +``` + +![](../outputs/scenario_nurse_wait_compare_templates.png) + +``` r +# Define path +path <- file.path(output_dir, "scenario_nurse_util_compare_templates.png") + +# Calculate results and generate plot +result <- plot_scenario( + results = compare_template_results, + x_var = "patient_inter", + result_var = "utilisation_nurse", + colour_var = "number_of_nurses", + xaxis_title = "Patient inter-arrival time", + yaxis_title = "Mean nurse utilisation", + legend_title = "Nurses", + path = path +) + +# View plot +include_graphics(path) +``` + +![](../outputs/scenario_nurse_util_compare_templates.png) + ## Sensitivity analysis Can use similar code to perform sensitivity analyses. @@ -463,7 +505,7 @@ Can use similar code to perform sensitivity analyses. ``` r # Run sensitivity analysis consult <- list(mean_n_consult_time = c(8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L)) -sensitivity_consult <- run_scenarios(consult) +sensitivity_consult <- run_scenarios(consult, base_list = parameters()) ``` ## [1] "There are 8 scenarios. Running:" @@ -531,7 +573,7 @@ print(sensitivity_table_latex) ``` ## % latex table generated in R 4.4.1 by xtable 1.8-4 package - ## % Mon Jan 27 16:03:07 2025 + ## % Tue Mar 4 15:50:24 2025 ## \begin{table}[ht] ## \centering ## \begin{tabular}{rrl} @@ -566,19 +608,25 @@ included in the results as we set `ongoing = TRUE` for ``` r -param_class <- defaults() -param_class[["update"]](list(patient_inter = 0.5)) -result <- model(run_number = 0L, param_class = param_class) +param <- parameters(patient_inter = 0.5) +result <- model(run_number = 0L, param = param) tail(result[["arrivals"]]) ``` ## name start_time end_time activity_time resource replication - ## 160 patient97 47.84534 NA NA nurse 0 - ## 161 patient155 77.41953 NA NA nurse 0 - ## 162 patient66 35.72669 NA NA nurse 0 - ## 163 patient52 27.21852 NA NA nurse 0 - ## 164 patient156 77.59569 NA NA nurse 0 + ## 160 patient148 74.24750 NA NA nurse 0 + ## 161 patient151 75.76834 NA NA nurse 0 + ## 162 patient153 76.74319 NA NA nurse 0 + ## 163 patient159 78.37804 NA NA nurse 0 + ## 164 patient43 20.56980 NA NA nurse 0 ## 165 patient160 78.41585 NA NA nurse 0 + ## q_time_unseen + ## 160 5.752504 + ## 161 4.231663 + ## 162 3.256809 + ## 163 1.621961 + ## 164 59.430201 + ## 165 1.584149 ## Example run with logs @@ -592,12 +640,13 @@ In this example, the log prints to screen as we have set `log_to_file` to TRUE and providing a `file_path` in `param`. ``` r -param_class <- defaults() -param_class[["update"]](list(data_collection_period = 100L, - number_of_runs = 1L, - cores = 1L, - log_to_console = TRUE)) -verbose_run <- model(run_number = 0L, param_class = param_class) +param <- parameters( + data_collection_period = 100L, + number_of_runs = 1L, + cores = 1L, + log_to_console = TRUE +) +verbose_run <- model(run_number = 0L, param = param) ``` ## [1] "Parameters:" @@ -820,6 +869,34 @@ verbose_run[["arrivals"]] ## 25 patient26 99.0189155 NA NA nurse 0 ## 26 patient24 84.4587897 NA NA nurse 0 ## 27 patient23 82.4020925 NA NA nurse 0 + ## q_time_unseen + ## 1 NA + ## 2 NA + ## 3 NA + ## 4 NA + ## 5 NA + ## 6 NA + ## 7 NA + ## 8 NA + ## 9 NA + ## 10 NA + ## 11 NA + ## 12 NA + ## 13 NA + ## 14 NA + ## 15 NA + ## 16 NA + ## 17 NA + ## 18 NA + ## 19 NA + ## 20 NA + ## 21 NA + ## 22 NA + ## 23 NA + ## 24 NA + ## 25 0.9810845 + ## 26 15.5412103 + ## 27 17.5979075 ## Calculate run time @@ -834,4 +911,4 @@ seconds <- as.integer(runtime %% 60L) print(sprintf("Notebook run time: %dm %ds", minutes, seconds)) ``` - ## [1] "Notebook run time: 0m 17s" + ## [1] "Notebook run time: 0m 43s" diff --git a/rmarkdown/choosing_cores.Rmd b/rmarkdown/choosing_cores.Rmd new file mode 100644 index 0000000..232c574 --- /dev/null +++ b/rmarkdown/choosing_cores.Rmd @@ -0,0 +1,131 @@ +--- +title: "Choosing cores" +author: "Amy Heather" +date: "`r Sys.Date()`" +output: + github_document: + toc: true + html_preview: false +--- + +This notebook documents the choice of the number of CPU cores. + +The generated images are saved and then loaded, so that we view the image as saved (i.e. with the dimensions set in `ggsave()`). This also avoids the creation of a `_files/` directory when knitting the document (which would save all previewed images into that folder also, so they can be rendered and displayed within the output `.md` file, even if we had not specifically saved them). These are viewed using `include_graphics()`, which must be the last command in the cell (or last in the plotting function). + +The run time is provided at the end of the notebook. + +## Set up + +Install the latest version of the local simulation package. + +```{r} +devtools::load_all() +``` + +Load required packages. + +```{r} +# nolint start: undesirable_function_linter. +library(data.table) +library(dplyr) +library(ggplot2) +library(knitr) +library(simulation) + +options(data.table.summarise.inform = FALSE) +options(dplyr.summarise.inform = FALSE) +# nolint end +``` + +Start timer. + +```{r} +start_time <- Sys.time() +``` + +Define path to outputs folder. + +```{r} +output_dir <- file.path("..", "outputs") +``` + +## Run time with varying number of CPU cores + +```{r} +#' Run model with varying number of CPU cores and examine run times +#' +#' @param n_cores Number of cores to test up to +#' @param file Filename to save figure to. +#' @param model_param List of parameters for the model. + +run_cores <- function(n_cores, file, model_param = NULL) { + # Run model with 1 to 8 cores + speed <- list() + for (i in 1L:n_cores){ + print(paste("Running with cores:", i)) + cores_start <- Sys.time() + + # Run model with specified number of cores + # If specified, also set to provided model parameters + if (!is.null(model_param)) { + param <- do.call(parameters, model_param) + } else { + param <- parameters() + } + param[["cores"]] <- i + invisible(runner(param)) + + # Record time taken, rounded to nearest .5 dp by running round(x*2)/2 + cores_time <- round( + as.numeric(Sys.time() - cores_start, units = "secs") * 2L + ) / 2L + speed[[i]] <- list(cores = i, run_time = round(cores_time, 3L)) + } + + # Convert to dataframe + speed_df <- rbindlist(speed) + + # Generate plot + p <- ggplot(speed_df, aes(x = .data[["cores"]], y = .data[["run_time"]])) + + geom_line() + + labs(x = "Cores", y = "Run time (rounded to nearest .5 seconds)") + + theme_minimal() + + # Save plot + full_path <- file.path(output_dir, file) + ggsave(filename = full_path, plot = p, + width = 6.5, height = 4L, bg = "white") + + # View the plot + include_graphics(full_path) +} +``` + +Setting up and managing a parallel cluster takes extra time. For small tasks or few iterations, this extra time can be more than the time saved by running in parallel. + +```{r} +run_cores(5L, "cores1.png") +``` + +Having increased the simulation length, we now see that parallelisation is decreasing the model run time. + +However, when you use more cores, the data needs to be divided and sent to more workers. For small tasks, this extra work is small, but as the number of workers increases, the time spent managing and communicating with them can grow too much. At some point, this overhead becomes larger than the time saved by using more cores. + +The optimal number of cores will vary depending on your model parameters and machine. + +```{r} +run_cores(5L, "cores2.png", list(data_collection_period = 100000L)) +``` + +## Run time + +```{r end_timer} +# Get run time in seconds +end_time <- Sys.time() +runtime <- as.numeric(end_time - start_time, units = "secs") + +# Display converted to minutes and seconds +minutes <- as.integer(runtime / 60L) +seconds <- as.integer(runtime %% 60L) +print(sprintf("Notebook run time: %dm %ds", minutes, seconds)) +``` diff --git a/rmarkdown/choosing_cores.md b/rmarkdown/choosing_cores.md new file mode 100644 index 0000000..05e935a --- /dev/null +++ b/rmarkdown/choosing_cores.md @@ -0,0 +1,188 @@ +Choosing cores +================ +Amy Heather +2025-03-04 + +- [Set up](#set-up) +- [Run time with varying number of CPU + cores](#run-time-with-varying-number-of-cpu-cores) +- [Run time](#run-time) + +This notebook documents the choice of the number of CPU cores. + +The generated images are saved and then loaded, so that we view the +image as saved (i.e.Β with the dimensions set in `ggsave()`). This also +avoids the creation of a `_files/` directory when knitting the document +(which would save all previewed images into that folder also, so they +can be rendered and displayed within the output `.md` file, even if we +had not specifically saved them). These are viewed using +`include_graphics()`, which must be the last command in the cell (or +last in the plotting function). + +The run time is provided at the end of the notebook. + +## Set up + +Install the latest version of the local simulation package. + +``` r +devtools::load_all() +``` + + ## β„Ή Loading simulation + +Load required packages. + +``` r +# nolint start: undesirable_function_linter. +library(data.table) +library(dplyr) +``` + + ## + ## Attaching package: 'dplyr' + + ## The following objects are masked from 'package:data.table': + ## + ## between, first, last + + ## The following object is masked from 'package:testthat': + ## + ## matches + + ## The following objects are masked from 'package:stats': + ## + ## filter, lag + + ## The following objects are masked from 'package:base': + ## + ## intersect, setdiff, setequal, union + +``` r +library(ggplot2) +library(knitr) +library(simulation) + +options(data.table.summarise.inform = FALSE) +options(dplyr.summarise.inform = FALSE) +# nolint end +``` + +Start timer. + +``` r +start_time <- Sys.time() +``` + +Define path to outputs folder. + +``` r +output_dir <- file.path("..", "outputs") +``` + +## Run time with varying number of CPU cores + +``` r +#' Run model with varying number of CPU cores and examine run times +#' +#' @param n_cores Number of cores to test up to +#' @param file Filename to save figure to. +#' @param model_param List of parameters for the model. + +run_cores <- function(n_cores, file, model_param = NULL) { + # Run model with 1 to 8 cores + speed <- list() + for (i in 1L:n_cores){ + print(paste("Running with cores:", i)) + cores_start <- Sys.time() + + # Run model with specified number of cores + # If specified, also set to provided model parameters + if (!is.null(model_param)) { + param <- do.call(parameters, model_param) + } else { + param <- parameters() + } + param[["cores"]] <- i + invisible(runner(param)) + + # Record time taken, rounded to nearest .5 dp by running round(x*2)/2 + cores_time <- round( + as.numeric(Sys.time() - cores_start, units = "secs") * 2L + ) / 2L + speed[[i]] <- list(cores = i, run_time = round(cores_time, 3L)) + } + + # Convert to dataframe + speed_df <- rbindlist(speed) + + # Generate plot + p <- ggplot(speed_df, aes(x = .data[["cores"]], y = .data[["run_time"]])) + + geom_line() + + labs(x = "Cores", y = "Run time (rounded to nearest .5 seconds)") + + theme_minimal() + + # Save plot + full_path <- file.path(output_dir, file) + ggsave(filename = full_path, plot = p, + width = 6.5, height = 4L, bg = "white") + + # View the plot + include_graphics(full_path) +} +``` + +Setting up and managing a parallel cluster takes extra time. For small +tasks or few iterations, this extra time can be more than the time saved +by running in parallel. + +``` r +run_cores(5L, "cores1.png") +``` + + ## [1] "Running with cores: 1" + ## [1] "Running with cores: 2" + ## [1] "Running with cores: 3" + ## [1] "Running with cores: 4" + ## [1] "Running with cores: 5" + +![](../outputs/cores1.png) + +Having increased the simulation length, we now see that parallelisation +is decreasing the model run time. + +However, when you use more cores, the data needs to be divided and sent +to more workers. For small tasks, this extra work is small, but as the +number of workers increases, the time spent managing and communicating +with them can grow too much. At some point, this overhead becomes larger +than the time saved by using more cores. + +The optimal number of cores will vary depending on your model parameters +and machine. + +``` r +run_cores(5L, "cores2.png", list(data_collection_period = 100000L)) +``` + + ## [1] "Running with cores: 1" + ## [1] "Running with cores: 2" + ## [1] "Running with cores: 3" + ## [1] "Running with cores: 4" + ## [1] "Running with cores: 5" + +![](../outputs/cores2.png) + +## Run time + +``` r +# Get run time in seconds +end_time <- Sys.time() +runtime <- as.numeric(end_time - start_time, units = "secs") + +# Display converted to minutes and seconds +minutes <- as.integer(runtime / 60L) +seconds <- as.integer(runtime %% 60L) +print(sprintf("Notebook run time: %dm %ds", minutes, seconds)) +``` + + ## [1] "Notebook run time: 2m 9s" diff --git a/rmarkdown/choosing_parameters.Rmd b/rmarkdown/choosing_parameters.Rmd deleted file mode 100644 index 68dee33..0000000 --- a/rmarkdown/choosing_parameters.Rmd +++ /dev/null @@ -1,298 +0,0 @@ ---- -title: "Choosing parameters" -author: "Amy Heather" -date: "`r Sys.Date()`" -output: - github_document: - toc: true ---- - -This notebook documents the choice of simulation parameters including: - -* Number of replications -* Number of CPU cores - -The generated images are saved and then loaded, so that we view the image as saved (i.e. with the dimensions set in `ggsave()`). This also avoids the creation of a `_files/` directory when knitting the document (which would save all previewed images into that folder also, so they can be rendered and displayed within the output `.md` file, even if we had not specifically saved them). These are viewed using `include_graphics()`, which must be the last command in the cell (or last in the plotting function). - -The run time is provided at the end of the notebook. - -## Set up - -Install the latest version of the local simulation package. - -```{r} -devtools::install() -``` - -Load required packages. - -```{r} -# nolint start: undesirable_function_linter. -library(data.table) -library(dplyr) -library(ggplot2) -library(knitr) -library(simulation) -library(tidyr) - -options(data.table.summarise.inform = FALSE) -options(dplyr.summarise.inform = FALSE) -# nolint end -``` - -Start timer. - -```{r} -start_time <- Sys.time() -``` - -Define path to outputs folder. - -```{r} -output_dir <- file.path("..", "outputs") -``` - -## Choosing the number of replications - -The **confidence interval method** can be used to select the number of replications to run. The more replications you run, the narrower your confidence interval becomes, leading to a more precise estimate of the model's mean performance. - -First, you select a desired confidence interval - for example, 95%. Then, run the model with an increasing number of replications, and identify the number required to achieve that precision in the estimate of a given metric - and also, to maintain that precision (as the intervals may converge or expand again later on). - -This method is less useful for values very close to zero - so, for example, when using utilisation (which ranges from 0 to 1) it is recommended to multiple values by 100. - -When selecting the number of replications you should repeat the analysis for all performance measures and select the highest value as your number of replications. - -```{r} -#' Use the confidence interval method to select the number of replications. -#' -#' @param replications Number of times to run the model. -#' @param desired_precision Desired mean deviation from confidence interval. -#' @param metric Name of performance metric to assess. -#' @param yaxis_title Label for y axis. -#' @param path Path inc. filename to save figure to. -#' @param min_rep A suggested minimum number of replications (default=NULL). - -confidence_interval_method <- function(replications, desired_precision, metric, - yaxis_title, path, min_rep = NULL) { - # Run model for specified number of replications - param_class <- defaults() - param_class[["update"]](list(number_of_runs = replications)) - envs <- trial(param_class) - results <- process_replications(envs) - - # If mean of metric is less than 1, multiply by 100 - if (mean(results[[metric]]) < 1L) { - results[[paste0("adj_", metric)]] <- results[[metric]] * 100L - metric <- paste0("adj_", metric) - } - - # Initialise list to store the results - cumulative_list <- list() - - # For each row in the dataframe, filter to rows up to the i-th replication - # then perform calculations - for (i in 1L:replications) { - - # Filter rows up to the i-th replication - subset <- results[[metric]][1L:i] - - # Calculate mean - mean <- mean(subset) - - # Some calculations require more than 1 observation else will error... - if (i == 1L) { - # When only one observation, set to NA - std_dev <- NA - ci_lower <- NA - ci_upper <- NA - deviation <- NA - } else { - # Else, calculate standard deviation, 95% confidence interval, and - # percentage deviation - std_dev <- sd(subset) - ci <- t.test(subset)[["conf.int"]] - ci_lower <- ci[[1L]] - ci_upper <- ci[[2L]] - deviation <- ((ci_upper - mean) / mean) * 100L - } - - # Append to the cumulative list - cumulative_list[[i]] <- data.frame( - replications = i, - cumulative_mean = mean, - cumulative_std = std_dev, - ci_lower = ci_lower, - ci_upper = ci_upper, - perc_deviation = deviation - ) - } - - # Combine the list into a single data frame - cumulative <- do.call(rbind, cumulative_list) - - # Get the minimum number of replications where deviation is less than target - compare <- cumulative %>% - filter(.data[["perc_deviation"]] <= desired_precision * 100L) - if (nrow(compare) > 0L) { - # Get minimum number - n_reps <- compare %>% - slice_head() %>% - dplyr::select(replications) %>% - pull() - print(paste0("Reached desired precision (", desired_precision, ") in ", - n_reps, " replications.")) - } else { - warning("Running ", replications, " replications did not reach ", - "desired precision (", desired_precision, ").") - } - - # Plot the cumulative mean and confidence interval - p <- ggplot(cumulative, aes(x = .data[["replications"]], - y = .data[["cumulative_mean"]])) + - geom_line() + - geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), alpha = 0.2) - - # If specified, plot the minimum suggested number of replications - if (!is.null(min_rep)) { - p <- p + - geom_vline(xintercept = min_rep, linetype = "dashed", color = "red") - } - - # Modify labels and style - p <- p + - labs(x = "Replications", y = yaxis_title) + - theme_minimal() - - # Save the plot - ggsave(filename = path, width = 6.5, height = 4L, bg = "white") - - return(cumulative) -} -``` - -It's important to check ahead, to check that the 5% precision is maintained - which is fine in this case - it doesn't go back up to future deviation. - -```{r} -path <- file.path(output_dir, "choose_param_conf_int_1.png") - -# Run calculations and produce plot -ci_df <- confidence_interval_method( - replications = 150L, - desired_precision = 0.05, - metric = "mean_activity_time_nurse", - yaxis_title = "Mean time with nurse", - path = path, - min_rep = 98L -) - -# View first ten rows were percentage deviation is below 5 -ci_df %>% - filter(perc_deviation < 5L) %>% - head(10L) - -# View plot -include_graphics(path) -``` - -It is also important to check across multiple metrics. - -```{r} -path <- file.path(output_dir, "choose_param_conf_int_3.png") - -# Run calculations and produce plot -ci_df <- confidence_interval_method( - replications = 200L, - desired_precision = 0.05, - metric = "utilisation_nurse", - yaxis_title = "Mean nurse utilisation", - path = path, - min_rep = 148L -) - -# View first ten rows were percentage deviation is below 5 -ci_df %>% - filter(perc_deviation < 5L) %>% - head(10L) - -# View plot -include_graphics(path) -``` - -## Run time with varying number of CPU cores - -```{r} -#' Run model with varying number of CPU cores and examine run times -#' -#' @param n_cores Number of cores to test up to -#' @param file Filename to save figure to. -#' @param model_param List of parameters for the model. - -run_cores <- function(n_cores, file, model_param = NULL) { - # Run model with 1 to 8 cores - speed <- list() - for (i in 1L:n_cores){ - print(paste("Running with cores:", i)) - cores_start <- Sys.time() - - # Run model with specified number of cores - # If specified, also set to provided model parameters - param_class <- defaults() - if (!is.null(model_param)) { - param_class[["update"]](model_param) - } - param_class[["update"]](list(cores = i)) - invisible(trial(param_class)) - - # Record time taken, rounded to nearest .5 dp by running round(x*2)/2 - cores_time <- round( - as.numeric(Sys.time() - cores_start, units = "secs")*2)/2 - speed[[i]] <- list(cores = i, run_time = round(cores_time, 3L)) - } - - # Convert to dataframe - speed_df <- rbindlist(speed) - - # Generate plot - p <- ggplot(speed_df, aes(x = .data[["cores"]], y = .data[["run_time"]])) + - geom_line() + - labs(x = "Cores", y = "Run time (rounded to nearest .5 seconds)") + - theme_minimal() - - # Save plot - full_path <- file.path(output_dir, file) - ggsave(filename = full_path, plot = p, - width = 6.5, height = 4L, bg = "white") - - # View the plot - include_graphics(full_path) -} -``` - -Setting up and managing a parallel cluster takes extra time. For small tasks or few iterations, this extra time can be more than the time saved by running in parallel. - -```{r} -run_cores(5L, "cores1.png") -``` - -Having increased the simulation length, we now see that parallelisation is decreasing the model run time. - -However, when you use more cores, the data needs to be divided and sent to more workers. For small tasks, this extra work is small, but as the number of workers increases, the time spent managing and communicating with them can grow too much. At some point, this overhead becomes larger than the time saved by using more cores. - -The optimal number of cores will vary depending on your model parameters and machine. - -```{r} -run_cores(5L, "cores2.png", list(data_collection_period = 100000L)) -``` - -## Run time - -```{r end_timer} -# Get run time in seconds -end_time <- Sys.time() -runtime <- as.numeric(end_time - start_time, units = "secs") - -# Display converted to minutes and seconds -minutes <- as.integer(runtime / 60L) -seconds <- as.integer(runtime %% 60L) -print(sprintf("Notebook run time: %dm %ds", minutes, seconds)) -``` diff --git a/rmarkdown/choosing_parameters.md b/rmarkdown/choosing_parameters.md deleted file mode 100644 index eb17b4e..0000000 --- a/rmarkdown/choosing_parameters.md +++ /dev/null @@ -1,431 +0,0 @@ -Choosing parameters -================ -Amy Heather -2025-01-27 - -- [Set up](#set-up) -- [Choosing the number of - replications](#choosing-the-number-of-replications) -- [Run time with varying number of CPU - cores](#run-time-with-varying-number-of-cpu-cores) -- [Run time](#run-time) - -This notebook documents the choice of simulation parameters including: - -- Number of replications -- Number of CPU cores - -The generated images are saved and then loaded, so that we view the -image as saved (i.e.Β with the dimensions set in `ggsave()`). This also -avoids the creation of a `_files/` directory when knitting the document -(which would save all previewed images into that folder also, so they -can be rendered and displayed within the output `.md` file, even if we -had not specifically saved them). These are viewed using -`include_graphics()`, which must be the last command in the cell (or -last in the plotting function). - -The run time is provided at the end of the notebook. - -## Set up - -Install the latest version of the local simulation package. - -``` r -devtools::install() -``` - - ## - ## ── R CMD build ───────────────────────────────────────────────────────────────── - ## checking for file β€˜/home/amy/Documents/stars/rap_template_r_des/DESCRIPTION’ ... βœ” checking for file β€˜/home/amy/Documents/stars/rap_template_r_des/DESCRIPTION’ - ## ─ preparing β€˜simulation’: - ## checking DESCRIPTION meta-information ... βœ” checking DESCRIPTION meta-information - ## ─ checking for LF line-endings in source and make files and shell scripts - ## ─ checking for empty or unneeded directories - ## Omitted β€˜LazyData’ from DESCRIPTION - ## ─ building β€˜simulation_0.1.0.tar.gz’ - ## - ## Running /opt/R/4.4.1/lib/R/bin/R CMD INSTALL \ - ## /tmp/RtmpaECLM8/simulation_0.1.0.tar.gz --install-tests - ## * installing to library β€˜/home/amy/.cache/R/renv/library/rap_template_r_des-cd7d6844/linux-ubuntu-noble/R-4.4/x86_64-pc-linux-gnu’ - ## * installing *source* package β€˜simulation’ ... - ## ** using staged installation - ## ** R - ## ** tests - ## ** byte-compile and prepare package for lazy loading - ## ** help - ## *** installing help indices - ## ** building package indices - ## ** testing if installed package can be loaded from temporary location - ## ** testing if installed package can be loaded from final location - ## ** testing if installed package keeps a record of temporary installation path - ## * DONE (simulation) - -Load required packages. - -``` r -# nolint start: undesirable_function_linter. -library(data.table) -library(dplyr) -``` - - ## - ## Attaching package: 'dplyr' - - ## The following objects are masked from 'package:data.table': - ## - ## between, first, last - - ## The following objects are masked from 'package:stats': - ## - ## filter, lag - - ## The following objects are masked from 'package:base': - ## - ## intersect, setdiff, setequal, union - -``` r -library(ggplot2) -library(knitr) -library(simulation) -library(tidyr) - -options(data.table.summarise.inform = FALSE) -options(dplyr.summarise.inform = FALSE) -# nolint end -``` - -Start timer. - -``` r -start_time <- Sys.time() -``` - -Define path to outputs folder. - -``` r -output_dir <- file.path("..", "outputs") -``` - -## Choosing the number of replications - -The **confidence interval method** can be used to select the number of -replications to run. The more replications you run, the narrower your -confidence interval becomes, leading to a more precise estimate of the -model’s mean performance. - -First, you select a desired confidence interval - for example, 95%. -Then, run the model with an increasing number of replications, and -identify the number required to achieve that precision in the estimate -of a given metric - and also, to maintain that precision (as the -intervals may converge or expand again later on). - -This method is less useful for values very close to zero - so, for -example, when using utilisation (which ranges from 0 to 1) it is -recommended to multiple values by 100. - -When selecting the number of replications you should repeat the analysis -for all performance measures and select the highest value as your number -of replications. - -``` r -#' Use the confidence interval method to select the number of replications. -#' -#' @param replications Number of times to run the model. -#' @param desired_precision Desired mean deviation from confidence interval. -#' @param metric Name of performance metric to assess. -#' @param yaxis_title Label for y axis. -#' @param path Path inc. filename to save figure to. -#' @param min_rep A suggested minimum number of replications (default=NULL). - -confidence_interval_method <- function(replications, desired_precision, metric, - yaxis_title, path, min_rep = NULL) { - # Run model for specified number of replications - param_class <- defaults() - param_class[["update"]](list(number_of_runs = replications)) - envs <- trial(param_class) - results <- process_replications(envs) - - # If mean of metric is less than 1, multiply by 100 - if (mean(results[[metric]]) < 1L) { - results[[paste0("adj_", metric)]] <- results[[metric]] * 100L - metric <- paste0("adj_", metric) - } - - # Initialise list to store the results - cumulative_list <- list() - - # For each row in the dataframe, filter to rows up to the i-th replication - # then perform calculations - for (i in 1L:replications) { - - # Filter rows up to the i-th replication - subset <- results[[metric]][1L:i] - - # Calculate mean - mean <- mean(subset) - - # Some calculations require more than 1 observation else will error... - if (i == 1L) { - # When only one observation, set to NA - std_dev <- NA - ci_lower <- NA - ci_upper <- NA - deviation <- NA - } else { - # Else, calculate standard deviation, 95% confidence interval, and - # percentage deviation - std_dev <- sd(subset) - ci <- t.test(subset)[["conf.int"]] - ci_lower <- ci[[1L]] - ci_upper <- ci[[2L]] - deviation <- ((ci_upper - mean) / mean) * 100L - } - - # Append to the cumulative list - cumulative_list[[i]] <- data.frame( - replications = i, - cumulative_mean = mean, - cumulative_std = std_dev, - ci_lower = ci_lower, - ci_upper = ci_upper, - perc_deviation = deviation - ) - } - - # Combine the list into a single data frame - cumulative <- do.call(rbind, cumulative_list) - - # Get the minimum number of replications where deviation is less than target - compare <- cumulative %>% - filter(.data[["perc_deviation"]] <= desired_precision * 100L) - if (nrow(compare) > 0L) { - # Get minimum number - n_reps <- compare %>% - slice_head() %>% - dplyr::select(replications) %>% - pull() - print(paste0("Reached desired precision (", desired_precision, ") in ", - n_reps, " replications.")) - } else { - warning("Running ", replications, " replications did not reach ", - "desired precision (", desired_precision, ").") - } - - # Plot the cumulative mean and confidence interval - p <- ggplot(cumulative, aes(x = .data[["replications"]], - y = .data[["cumulative_mean"]])) + - geom_line() + - geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), alpha = 0.2) - - # If specified, plot the minimum suggested number of replications - if (!is.null(min_rep)) { - p <- p + - geom_vline(xintercept = min_rep, linetype = "dashed", color = "red") - } - - # Modify labels and style - p <- p + - labs(x = "Replications", y = yaxis_title) + - theme_minimal() - - # Save the plot - ggsave(filename = path, width = 6.5, height = 4L, bg = "white") - - return(cumulative) -} -``` - -It’s important to check ahead, to check that the 5% precision is -maintained - which is fine in this case - it doesn’t go back up to -future deviation. - -``` r -path <- file.path(output_dir, "choose_param_conf_int_1.png") - -# Run calculations and produce plot -ci_df <- confidence_interval_method( - replications = 150L, - desired_precision = 0.05, - metric = "mean_activity_time_nurse", - yaxis_title = "Mean time with nurse", - path = path, - min_rep = 98L -) -``` - - ## [1] "Reached desired precision (0.05) in 98 replications." - -``` r -# View first ten rows were percentage deviation is below 5 -ci_df %>% - filter(perc_deviation < 5L) %>% - head(10L) -``` - - ## replications cumulative_mean cumulative_std ci_lower ci_upper perc_deviation - ## 1 98 8.461235 2.106669 8.038875 8.883596 4.991712 - ## 2 99 8.475054 2.100398 8.056137 8.893971 4.942943 - ## 3 100 8.468351 2.090838 8.053483 8.883219 4.899036 - ## 4 101 8.473309 2.080954 8.062503 8.884115 4.848241 - ## 5 102 8.478815 2.071373 8.071959 8.885671 4.798504 - ## 6 103 8.485316 2.062250 8.082270 8.888361 4.749915 - ## 7 104 8.490698 2.052949 8.091450 8.889945 4.702173 - ## 8 105 8.477837 2.047301 8.081634 8.874040 4.673399 - ## 9 106 8.456515 2.049320 8.061841 8.851190 4.667105 - ## 10 107 8.459470 2.039859 8.068501 8.850440 4.621677 - -``` r -# View plot -include_graphics(path) -``` - -![](../outputs/choose_param_conf_int_1.png) - -It is also important to check across multiple metrics. - -``` r -path <- file.path(output_dir, "choose_param_conf_int_3.png") - -# Run calculations and produce plot -ci_df <- confidence_interval_method( - replications = 200L, - desired_precision = 0.05, - metric = "utilisation_nurse", - yaxis_title = "Mean nurse utilisation", - path = path, - min_rep = 148L -) -``` - - ## [1] "Reached desired precision (0.05) in 148 replications." - -``` r -# View first ten rows were percentage deviation is below 5 -ci_df %>% - filter(perc_deviation < 5L) %>% - head(10L) -``` - - ## replications cumulative_mean cumulative_std ci_lower ci_upper perc_deviation - ## 1 148 45.73420 14.04814 43.45214 48.01625 4.989822 - ## 2 149 45.89021 14.12952 43.60278 48.17764 4.984574 - ## 3 150 45.90075 14.08261 43.62865 48.17286 4.950028 - ## 4 151 45.84563 14.05193 43.58612 48.10513 4.928512 - ## 5 152 45.92331 14.03803 43.67359 48.17302 4.898849 - ## 6 153 45.84086 14.02890 43.60008 48.08163 4.888154 - ## 7 154 45.71614 14.06836 43.47650 47.95579 4.899034 - ## 8 155 45.85137 14.12331 43.61035 48.09239 4.887567 - ## 9 156 45.87804 14.08162 43.65092 48.10515 4.854423 - ## 10 157 46.12548 14.37477 43.85937 48.39160 4.912928 - -``` r -# View plot -include_graphics(path) -``` - -![](../outputs/choose_param_conf_int_3.png) - -## Run time with varying number of CPU cores - -``` r -#' Run model with varying number of CPU cores and examine run times -#' -#' @param n_cores Number of cores to test up to -#' @param file Filename to save figure to. -#' @param model_param List of parameters for the model. - -run_cores <- function(n_cores, file, model_param = NULL) { - # Run model with 1 to 8 cores - speed <- list() - for (i in 1L:n_cores){ - print(paste("Running with cores:", i)) - cores_start <- Sys.time() - - # Run model with specified number of cores - # If specified, also set to provided model parameters - param_class <- defaults() - if (!is.null(model_param)) { - param_class[["update"]](model_param) - } - param_class[["update"]](list(cores = i)) - invisible(trial(param_class)) - - # Record time taken, rounded to nearest .5 dp by running round(x*2)/2 - cores_time <- round( - as.numeric(Sys.time() - cores_start, units = "secs")*2)/2 - speed[[i]] <- list(cores = i, run_time = round(cores_time, 3L)) - } - - # Convert to dataframe - speed_df <- rbindlist(speed) - - # Generate plot - p <- ggplot(speed_df, aes(x = .data[["cores"]], y = .data[["run_time"]])) + - geom_line() + - labs(x = "Cores", y = "Run time (rounded to nearest .5 seconds)") + - theme_minimal() - - # Save plot - full_path <- file.path(output_dir, file) - ggsave(filename = full_path, plot = p, - width = 6.5, height = 4L, bg = "white") - - # View the plot - include_graphics(full_path) -} -``` - -Setting up and managing a parallel cluster takes extra time. For small -tasks or few iterations, this extra time can be more than the time saved -by running in parallel. - -``` r -run_cores(5L, "cores1.png") -``` - - ## [1] "Running with cores: 1" - ## [1] "Running with cores: 2" - ## [1] "Running with cores: 3" - ## [1] "Running with cores: 4" - ## [1] "Running with cores: 5" - -![](../outputs/cores1.png) - -Having increased the simulation length, we now see that parallelisation -is decreasing the model run time. - -However, when you use more cores, the data needs to be divided and sent -to more workers. For small tasks, this extra work is small, but as the -number of workers increases, the time spent managing and communicating -with them can grow too much. At some point, this overhead becomes larger -than the time saved by using more cores. - -The optimal number of cores will vary depending on your model parameters -and machine. - -``` r -run_cores(5L, "cores2.png", list(data_collection_period = 100000L)) -``` - - ## [1] "Running with cores: 1" - ## [1] "Running with cores: 2" - ## [1] "Running with cores: 3" - ## [1] "Running with cores: 4" - ## [1] "Running with cores: 5" - -![](../outputs/cores2.png) - -## Run time - -``` r -# Get run time in seconds -end_time <- Sys.time() -runtime <- as.numeric(end_time - start_time, units = "secs") - -# Display converted to minutes and seconds -minutes <- as.integer(runtime / 60L) -seconds <- as.integer(runtime %% 60L) -print(sprintf("Notebook run time: %dm %ds", minutes, seconds)) -``` - - ## [1] "Notebook run time: 1m 19s" diff --git a/rmarkdown/choosing_replications.Rmd b/rmarkdown/choosing_replications.Rmd new file mode 100644 index 0000000..2eef497 --- /dev/null +++ b/rmarkdown/choosing_replications.Rmd @@ -0,0 +1,121 @@ +--- +title: "Choosing replications" +author: "Amy Heather" +date: "`r Sys.Date()`" +output: + github_document: + toc: true + html_preview: false +--- + +This notebook documents the choice of the number of replications. + +The generated images are saved and then loaded, so that we view the image as saved (i.e. with the dimensions set in `ggsave()`). This also avoids the creation of a `_files/` directory when knitting the document (which would save all previewed images into that folder also, so they can be rendered and displayed within the output `.md` file, even if we had not specifically saved them). These are viewed using `include_graphics()`, which must be the last command in the cell (or last in the plotting function). + +The run time is provided at the end of the notebook. + +## Set up + +Install the latest version of the local simulation package. + +```{r} +devtools::load_all() +``` + +Load required packages. + +```{r} +# nolint start: undesirable_function_linter. +library(data.table) +library(dplyr) +library(knitr) +library(simulation) +library(tidyr) + +options(data.table.summarise.inform = FALSE) +options(dplyr.summarise.inform = FALSE) +# nolint end +``` + +Start timer. + +```{r} +start_time <- Sys.time() +``` + +Define path to outputs folder. + +```{r} +output_dir <- file.path("..", "outputs") +``` + +## Choosing the number of replications + +The **confidence interval method** can be used to select the number of replications to run. The more replications you run, the narrower your confidence interval becomes, leading to a more precise estimate of the model's mean performance. + +First, you select a desired confidence interval - for example, 95%. Then, run the model with an increasing number of replications, and identify the number required to achieve that precision in the estimate of a given metric - and also, to maintain that precision (as the intervals may converge or expand again later on). + +This method is less useful for values very close to zero - so, for example, when using utilisation (which ranges from 0 to 1) it is recommended to multiple values by 100. + +When selecting the number of replications you should repeat the analysis for all performance measures and select the highest value as your number of replications. + +It's important to check ahead, to check that the 5% precision is maintained - which is fine in this case - it doesn't go back up to future deviation. + +```{r} +path <- file.path(output_dir, "choose_param_conf_int_1.png") + +# Run calculations and produce plot +ci_df <- confidence_interval_method( + replications = 150L, + desired_precision = 0.05, + metric = "mean_activity_time_nurse", + yaxis_title = "Mean time with nurse", + path = path, + min_rep = 98L +) + +# View first ten rows were percentage deviation is below 5 +ci_df %>% + filter(perc_deviation < 5L) %>% + head(10L) + +# View plot +include_graphics(path) +``` + +It is also important to check across multiple metrics. + +```{r} +path <- file.path(output_dir, "choose_param_conf_int_3.png") + +# Run calculations and produce plot +ci_df <- confidence_interval_method( + replications = 200L, + desired_precision = 0.05, + metric = "utilisation_nurse", + yaxis_title = "Mean nurse utilisation", + path = path, + min_rep = 148L +) + +# View first ten rows were percentage deviation is below 5 +ci_df %>% + filter(perc_deviation < 5L) %>% + head(10L) + +# View plot +include_graphics(path) +``` + +## Run time + +```{r end_timer} +# Get run time in seconds +end_time <- Sys.time() +runtime <- as.numeric(end_time - start_time, units = "secs") + +# Display converted to minutes and seconds +minutes <- as.integer(runtime / 60L) +seconds <- as.integer(runtime %% 60L) +print(sprintf("Notebook run time: %dm %ds", minutes, seconds)) +``` diff --git a/rmarkdown/choosing_replications.md b/rmarkdown/choosing_replications.md new file mode 100644 index 0000000..7079706 --- /dev/null +++ b/rmarkdown/choosing_replications.md @@ -0,0 +1,216 @@ +Choosing replications +================ +Amy Heather +2025-03-04 + +- [Set up](#set-up) +- [Choosing the number of + replications](#choosing-the-number-of-replications) +- [Run time](#run-time) + +This notebook documents the choice of the number of replications. + +The generated images are saved and then loaded, so that we view the +image as saved (i.e.Β with the dimensions set in `ggsave()`). This also +avoids the creation of a `_files/` directory when knitting the document +(which would save all previewed images into that folder also, so they +can be rendered and displayed within the output `.md` file, even if we +had not specifically saved them). These are viewed using +`include_graphics()`, which must be the last command in the cell (or +last in the plotting function). + +The run time is provided at the end of the notebook. + +## Set up + +Install the latest version of the local simulation package. + +``` r +devtools::load_all() +``` + + ## β„Ή Loading simulation + +Load required packages. + +``` r +# nolint start: undesirable_function_linter. +library(data.table) +library(dplyr) +``` + + ## + ## Attaching package: 'dplyr' + + ## The following objects are masked from 'package:data.table': + ## + ## between, first, last + + ## The following object is masked from 'package:testthat': + ## + ## matches + + ## The following objects are masked from 'package:stats': + ## + ## filter, lag + + ## The following objects are masked from 'package:base': + ## + ## intersect, setdiff, setequal, union + +``` r +library(knitr) +library(simulation) +library(tidyr) +``` + + ## + ## Attaching package: 'tidyr' + + ## The following object is masked from 'package:testthat': + ## + ## matches + +``` r +options(data.table.summarise.inform = FALSE) +options(dplyr.summarise.inform = FALSE) +# nolint end +``` + +Start timer. + +``` r +start_time <- Sys.time() +``` + +Define path to outputs folder. + +``` r +output_dir <- file.path("..", "outputs") +``` + +## Choosing the number of replications + +The **confidence interval method** can be used to select the number of +replications to run. The more replications you run, the narrower your +confidence interval becomes, leading to a more precise estimate of the +model’s mean performance. + +First, you select a desired confidence interval - for example, 95%. +Then, run the model with an increasing number of replications, and +identify the number required to achieve that precision in the estimate +of a given metric - and also, to maintain that precision (as the +intervals may converge or expand again later on). + +This method is less useful for values very close to zero - so, for +example, when using utilisation (which ranges from 0 to 1) it is +recommended to multiple values by 100. + +When selecting the number of replications you should repeat the analysis +for all performance measures and select the highest value as your number +of replications. + +It’s important to check ahead, to check that the 5% precision is +maintained - which is fine in this case - it doesn’t go back up to +future deviation. + +``` r +path <- file.path(output_dir, "choose_param_conf_int_1.png") + +# Run calculations and produce plot +ci_df <- confidence_interval_method( + replications = 150L, + desired_precision = 0.05, + metric = "mean_activity_time_nurse", + yaxis_title = "Mean time with nurse", + path = path, + min_rep = 98L +) +``` + + ## [1] "Reached desired precision (0.05) in 98 replications." + +``` r +# View first ten rows were percentage deviation is below 5 +ci_df %>% + filter(perc_deviation < 5L) %>% + head(10L) +``` + + ## replications cumulative_mean cumulative_std ci_lower ci_upper perc_deviation + ## 1 98 8.461235 2.106669 8.038875 8.883596 4.991712 + ## 2 99 8.475054 2.100398 8.056137 8.893971 4.942943 + ## 3 100 8.468351 2.090838 8.053483 8.883219 4.899036 + ## 4 101 8.473309 2.080954 8.062503 8.884115 4.848241 + ## 5 102 8.478815 2.071373 8.071959 8.885671 4.798504 + ## 6 103 8.485316 2.062250 8.082270 8.888361 4.749915 + ## 7 104 8.490698 2.052949 8.091450 8.889945 4.702173 + ## 8 105 8.477837 2.047301 8.081634 8.874040 4.673399 + ## 9 106 8.456515 2.049320 8.061841 8.851190 4.667105 + ## 10 107 8.459470 2.039859 8.068501 8.850440 4.621677 + +``` r +# View plot +include_graphics(path) +``` + +![](../outputs/choose_param_conf_int_1.png) + +It is also important to check across multiple metrics. + +``` r +path <- file.path(output_dir, "choose_param_conf_int_3.png") + +# Run calculations and produce plot +ci_df <- confidence_interval_method( + replications = 200L, + desired_precision = 0.05, + metric = "utilisation_nurse", + yaxis_title = "Mean nurse utilisation", + path = path, + min_rep = 148L +) +``` + + ## [1] "Reached desired precision (0.05) in 148 replications." + +``` r +# View first ten rows were percentage deviation is below 5 +ci_df %>% + filter(perc_deviation < 5L) %>% + head(10L) +``` + + ## replications cumulative_mean cumulative_std ci_lower ci_upper perc_deviation + ## 1 148 45.73420 14.04814 43.45214 48.01625 4.989822 + ## 2 149 45.89021 14.12952 43.60278 48.17764 4.984574 + ## 3 150 45.90075 14.08261 43.62865 48.17286 4.950028 + ## 4 151 45.84563 14.05193 43.58612 48.10513 4.928512 + ## 5 152 45.92331 14.03803 43.67359 48.17302 4.898849 + ## 6 153 45.84086 14.02890 43.60008 48.08163 4.888154 + ## 7 154 45.71614 14.06836 43.47650 47.95579 4.899034 + ## 8 155 45.85137 14.12331 43.61035 48.09239 4.887567 + ## 9 156 45.87804 14.08162 43.65092 48.10515 4.854423 + ## 10 157 46.12548 14.37477 43.85937 48.39160 4.912928 + +``` r +# View plot +include_graphics(path) +``` + +![](../outputs/choose_param_conf_int_3.png) + +## Run time + +``` r +# Get run time in seconds +end_time <- Sys.time() +runtime <- as.numeric(end_time - start_time, units = "secs") + +# Display converted to minutes and seconds +minutes <- as.integer(runtime / 60L) +seconds <- as.integer(runtime %% 60L) +print(sprintf("Notebook run time: %dm %ds", minutes, seconds)) +``` + + ## [1] "Notebook run time: 0m 5s" diff --git a/rmarkdown/generate_exp_results.Rmd b/rmarkdown/generate_exp_results.Rmd index ef45a43..b78b923 100644 --- a/rmarkdown/generate_exp_results.Rmd +++ b/rmarkdown/generate_exp_results.Rmd @@ -5,6 +5,7 @@ date: "`r Sys.Date()`" output: github_document: toc: true + html_preview: false --- This notebook is used to run a specific version of the model and save each results dataframe as a csv. These are used in `test-backtest.R` to verify that the model produces consistent results. @@ -18,7 +19,7 @@ The run time is provided at the end of this notebook. Install the latest version of the local simulation package. ```{r} -devtools::install() +devtools::load_all() ``` Load required packages. @@ -45,22 +46,22 @@ testdata_dir <- file.path("..", "tests", "testthat", "testdata") ```{r} # Define model parameters -param_class <- defaults() -param_class[["update"]](list()) -param_class[["update"]](list(patient_inter = 4L, - mean_n_consult_time = 10L, - number_of_nurses = 5L, - data_collection_period = 80L, - number_of_runs = 10L, - cores = 1L)) - -# Run the trial -raw_results <- trial(param_class) +param <- parameters( + patient_inter = 4L, + mean_n_consult_time = 10L, + number_of_nurses = 5L, + data_collection_period = 80L, + number_of_runs = 10L, + cores = 1L +) + +# Run the replications +raw_results <- runner(param) ``` ```{r} # Process results -results <- process_replications(raw_results) +results <- get_run_results(raw_results) # Preview head(results) diff --git a/rmarkdown/generate_exp_results.md b/rmarkdown/generate_exp_results.md index 4e56909..3dc4af8 100644 --- a/rmarkdown/generate_exp_results.md +++ b/rmarkdown/generate_exp_results.md @@ -1,7 +1,7 @@ Generate expected results ================ Amy Heather -2025-01-27 +2025-03-04 - [Set-up](#set-up) - [Run model and save results](#run-model-and-save-results) @@ -24,34 +24,10 @@ The run time is provided at the end of this notebook. Install the latest version of the local simulation package. ``` r -devtools::install() +devtools::load_all() ``` - ## - ## ── R CMD build ───────────────────────────────────────────────────────────────── - ## checking for file β€˜/home/amy/Documents/stars/rap_template_r_des/DESCRIPTION’ ... βœ” checking for file β€˜/home/amy/Documents/stars/rap_template_r_des/DESCRIPTION’ - ## ─ preparing β€˜simulation’: - ## checking DESCRIPTION meta-information ... βœ” checking DESCRIPTION meta-information - ## ─ checking for LF line-endings in source and make files and shell scripts - ## ─ checking for empty or unneeded directories - ## Omitted β€˜LazyData’ from DESCRIPTION - ## ─ building β€˜simulation_0.1.0.tar.gz’ - ## - ## Running /opt/R/4.4.1/lib/R/bin/R CMD INSTALL \ - ## /tmp/RtmpVSwa65/simulation_0.1.0.tar.gz --install-tests - ## * installing to library β€˜/home/amy/.cache/R/renv/library/rap_template_r_des-cd7d6844/linux-ubuntu-noble/R-4.4/x86_64-pc-linux-gnu’ - ## * installing *source* package β€˜simulation’ ... - ## ** using staged installation - ## ** R - ## ** tests - ## ** byte-compile and prepare package for lazy loading - ## ** help - ## *** installing help indices - ## ** building package indices - ## ** testing if installed package can be loaded from temporary location - ## ** testing if installed package can be loaded from final location - ## ** testing if installed package keeps a record of temporary installation path - ## * DONE (simulation) + ## β„Ή Loading simulation Load required packages. @@ -77,22 +53,22 @@ testdata_dir <- file.path("..", "tests", "testthat", "testdata") ``` r # Define model parameters -param_class <- defaults() -param_class[["update"]](list()) -param_class[["update"]](list(patient_inter = 4L, - mean_n_consult_time = 10L, - number_of_nurses = 5L, - data_collection_period = 80L, - number_of_runs = 10L, - cores = 1L)) - -# Run the trial -raw_results <- trial(param_class) +param <- parameters( + patient_inter = 4L, + mean_n_consult_time = 10L, + number_of_nurses = 5L, + data_collection_period = 80L, + number_of_runs = 10L, + cores = 1L +) + +# Run the replications +raw_results <- runner(param) ``` ``` r # Process results -results <- process_replications(raw_results) +results <- get_run_results(raw_results) # Preview head(results) diff --git a/run_rmarkdown.sh b/run_rmarkdown.sh new file mode 100644 index 0000000..ad43c8c --- /dev/null +++ b/run_rmarkdown.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# Ensure the script exits on error +set -e + +# Find and render all .Rmd files in the specified directory +for file in "rmarkdown"/*.Rmd; do + echo "Rendering: $file" + Rscript -e "rmarkdown::render('$file')" +done + +echo "Rendering complete!" diff --git a/tests/testthat/test-backtest.R b/tests/testthat/test-backtest.R index bbb0306..8b37d04 100644 --- a/tests/testthat/test-backtest.R +++ b/tests/testthat/test-backtest.R @@ -4,18 +4,19 @@ test_that("results from a new run match those previously generated", { # Choose a specific set of parameters (ensuring test remains on the same - # set, regardless of any changes to Defaults) - param_class <- defaults() - param_class[["update"]](list(patient_inter = 4L, - mean_n_consult_time = 10L, - number_of_nurses = 5L, - data_collection_period = 80L, - number_of_runs = 10L, - cores = 1L)) + # set, regardless of any changes to parameters()) + param <- parameters( + patient_inter = 4L, + mean_n_consult_time = 10L, + number_of_nurses = 5L, + data_collection_period = 80L, + number_of_runs = 10L, + cores = 1L + ) - # Run the trial then get the monitored arrivals and resources - envs <- trial(param_class) - results <- as.data.frame(process_replications(envs)) + # Run the replications then get the monitored arrivals and resources + raw_results <- runner(param) + results <- as.data.frame(get_run_results(raw_results)) # Import the expected results exp_results <- read.csv(test_path("testdata", "results.csv")) diff --git a/tests/testthat/test-functionaltest.R b/tests/testthat/test-functionaltest.R new file mode 100644 index 0000000..fa77451 --- /dev/null +++ b/tests/testthat/test-functionaltest.R @@ -0,0 +1,151 @@ +# Functional testing for the Discrete-Event Simulation (DES) Model. +# +# These verify that the system or components perform their intended +# functionality. + + +test_that("values are non-negative", { + # Run model with standard parameters + raw_results <- model(run_number = 0L, param = parameters()) + results <- get_run_results(raw_results) + + # Check that at least one patient was processed + expect_gt(results[["arrivals"]], 0L) + + # Check that the wait time is not negative + expect_gte(results[["mean_waiting_time_nurse"]], 0L) + + # Check that the activity time and utilisation are greater than 0 + expect_gt(results[["mean_activity_time_nurse"]], 0L) + expect_gt(results[["utilisation_nurse"]], 0L) +}) + + +test_that("under high demand, utilisation is valid and last patient is unseen", + { + # Run model with high number of arrivals and only one nurse + param <- parameters( + number_of_nurses = 1L, + patient_inter = 0.1, + number_of_runs = 1L, + cores = 1L + ) + raw_results <- runner(param) + results <- get_run_results(raw_results) + + # Check that utilisation does not exceed 1 or drop below 0 + expect_lte(results[["utilisation_nurse"]], 1L) + expect_gte(results[["utilisation_nurse"]], 0L) + + # Check that final patient is not seen by the nurse + expect_identical( + tail(raw_results[["arrivals"]], 1L)[["end_time"]], NA_real_ + ) + expect_identical( + tail(raw_results[["arrivals"]], 1L)[["activity_time"]], NA_real_ + ) + } +) + + +test_that("runner outputs a named list with length 2 and correct names", { + # Simple run of the model + param <- parameters( + data_collection_period = 50L, number_of_runs = 1L, cores = 1L + ) + result <- runner(param) + + # Check the structure + expect_type(result, "list") + expect_length(result, 2L) + expect_named(result, c("arrivals", "resources")) + + # Check that arrivals and resources are dataframes + expect_s3_class(result[["arrivals"]], "data.frame") + expect_s3_class(result[["resources"]], "data.frame") +}) + + +patrick::with_parameters_test_that( + "adjusting parameters decreases the wait time and utilisation", + { + # Set some defaults which will ensure sufficient arrivals/capacity to see + # variation in wait time and utilisation + default_param <- parameters(number_of_nurses = 4L, + patient_inter = 3L, + mean_n_consult_time = 15L, + data_collection_period = 200L, + number_of_runs = 1L) + + # Run model with initial value + init_param <- default_param + init_param[[param_name]] <- init_value + init_result <- get_run_results(runner(init_param)) + + # Run model with adjusted value + adj_param <- default_param + adj_param[[param_name]] <- adj_value + adj_result <- get_run_results(runner(adj_param)) + + # Check that waiting times in the adjusted model are lower + expect_lt(adj_result[["mean_waiting_time_nurse"]], + init_result[["mean_waiting_time_nurse"]]) + + # Check that utilisation in the adjusted model is lower + expect_lt(adj_result[["utilisation_nurse"]], + init_result[["utilisation_nurse"]]) + }, + patrick::cases( + list(param_name = "number_of_nurses", init_value = 3L, adj_value = 9L), + list(param_name = "patient_inter", init_value = 2L, adj_value = 15L), + list(param_name = "mean_n_consult_time", init_value = 30L, adj_value = 3L) + ) +) + + +patrick::with_parameters_test_that( + "adjusting parameters reduces the number of arrivals", + { + # Set some default parameters + default_param <- parameters(data_collection_period = 200L, + number_of_runs = 1L) + + # Run model with initial value + init_param <- default_param + init_param[[param_name]] <- init_value + init_result <- get_run_results(model(run_number = 1L, init_param)) + + # Run model with adjusted value + adj_param <- default_param + adj_param[[param_name]] <- adj_value + adj_result <- get_run_results(model(run_number = 1L, adj_param)) + + # Check that arrivals in the adjusted model are lower + expect_lt(adj_result[["arrivals"]], init_result[["arrivals"]]) + }, + patrick::cases( + list(param_name = "patient_inter", init_value = 2L, adj_value = 15L), + list(param_name = "data_collection_period", init_value = 2000L, + adj_value = 500L) + ) +) + + +test_that("the same seed returns the same result", { + # Run model twice using same run number (which will set the seed) + raw1 <- model(run_number = 0L, param = parameters()) + raw2 <- model(run_number = 0L, param = parameters()) + expect_identical(get_run_results(raw1), get_run_results(raw2)) + + # Conversely, if run with different run number, expect different + raw1 <- model(run_number = 0L, param = parameters()) + raw2 <- model(run_number = 1L, param = parameters()) + expect_failure( + expect_identical(get_run_results(raw1), get_run_results(raw2)) + ) + + # Repeat experiment, but with multiple replications + raw1 <- runner(param = parameters()) + raw2 <- runner(param = parameters()) + expect_identical(get_run_results(raw1), get_run_results(raw2)) +}) diff --git a/tests/testthat/test-unittest.R b/tests/testthat/test-unittest.R index 423935c..6a78ea1 100644 --- a/tests/testthat/test-unittest.R +++ b/tests/testthat/test-unittest.R @@ -1,24 +1,58 @@ # Unit testing for the Discrete-Event Simulation (DES) Model. # -# These check specific parts of the simulation and code, ensuring they work -# correctly and as expected. +# Unit tests are a type of functional testing that focuses on individual +# components (e.g. functions) and tests them in isolation to ensure they +# work as intended. +# +# In some cases, we check for a specific error message. This is because the +# test could otherwise pass with any error (and not necessarily the specific +# error we are checking for). -test_that("the same seed returns the same result", { +patrick::with_parameters_test_that( + "the model produces an error for invalid inputs", + { + # Create parameter list, with the modified input + param <- do.call(parameters, setNames(list(param_value), param_name)) - # Run model twice using same run number (which will set the seed) - env1 <- model(run_number = 0L, param_class = defaults()) - env2 <- model(run_number = 0L, param_class = defaults()) - expect_identical(process_replications(env1), process_replications(env2)) + # Construct expected error message + expected_message <- if (rule == "p") { + sprintf('The parameter "%s" must be greater than 0.', param_name) + } else { + sprintf( + 'The parameter "%s" must be an integer greater than or equal to 0.', + param_name + ) + } - # Conversely, if run with different run number, expect different - env1 <- model(run_number = 0L, param_class = defaults()) - env2 <- model(run_number = 1L, param_class = defaults()) - expect_failure( - expect_identical(process_replications(env1), process_replications(env2)) + # Verify that attempting to run the model raises the correct error message + expect_error(model(param = param, run_number = 0L), expected_message) + }, + patrick::cases( + # Parameters which should be positive (p) + list(param_name = "patient_inter", param_value = 0L, rule = "p"), + list(param_name = "mean_n_consult_time", param_value = 0L, rule = "p"), + list(param_name = "number_of_runs", param_value = 0L, rule = "p"), + # Parameters which should be non-negative integers (n) + list(param_name = "number_of_nurses", param_value = -1L, rule = "n"), + list(param_name = "data_collection_period", param_value = -1L, rule = "n") ) +) + +test_that("the model produces an error if a new parameter name is used", { + # Add new parameter to list generated by function + param <- parameters() + param[["new_entry"]] <- 3L + + # Check it fails with the anticipated error message + expect_error(model(run_number = 1L, param = param), "Extra keys: new_entry.") +}) + +test_that("the model produces an error if parameters are missing", { + # Remove a parameter + param <- parameters() + param <- within(param, rm("patient_inter")) - # Repeat experiment, but with multiple replications - envs1 <- trial(param_class = defaults()) - envs2 <- trial(param_class = defaults()) - expect_identical(process_replications(envs1), process_replications(envs2)) + # Check it fails with the anticipated error message + expect_error(model(run_number = 1L, param = param), + "Missing keys: patient_inter") })