Merge pull request #47 from pythonhealthdatascience/dev

amyheather · web-flow · commit 7d77c55c4825 · 2025-03-25T15:56:25.000Z
Dev
diff --git a/R/choose_replications.R b/R/choose_replications.R
@@ -227,11 +227,6 @@ ReplicationsAlgorithm <- R6Class("ReplicationsAlgorithm", list( # nolint: object
   desired_precision = NA,
 
   #' @field initial_replications Number of initial replications to perform.
-  #' Note that the minimum solution will be the value of initial_replications
-  #' (i.e. if require 20 initial replications but was resolved in 5, solution
-  #' output will still be 20). Although, if initial_replications < 3, solution
-  #' will still be at least 3, as that is the minimum required to calculate the
-  #' confidence intervals.
   initial_replications = NA,
 
   #' @field look_ahead Minimum additional replications to look ahead to assess
@@ -313,6 +308,47 @@ ReplicationsAlgorithm <- R6Class("ReplicationsAlgorithm", list( # nolint: object
     as.integer((self$look_ahead / 100L) * max(self$reps, 100L))
   },
 
+  #' @description
+  #' Find the first position where element is below deviation, and this is
+  #' maintained through the lookahead period.
+  #' This is used to correct the ReplicationsAlgorithm, which cannot return
+  #' a solution below the initial_replications.
+  #' @param lst List of numbers to compare against desired deviation.
+  #' @return Integer, minimum replications required to meet and maintain
+  #' precision.
+  find_position = function(lst) {
+    # Ensure that the input is a list
+    if (!is.list(lst)) {
+      stop("find_position requires a list but was supplied: ", typeof(lst),
+           call. = FALSE)
+    }
+
+    # Check if list is empty or no values below threshold
+    if (length(lst) == 0L || all(is.na(lst)) || !any(unlist(lst) < 0.5)) {
+      return(NULL)
+    }
+
+    # Find the first non-null value in the list
+    start_index <- which(!vapply(lst, is.na, logical(1L)))[1L]
+
+    # Iterate through the list, stopping when at last point where we still
+    # have enough elements to look ahead
+    max_index <- length(lst) - self$look_ahead
+    if (start_index > max_index) {
+      return(NULL)
+    }
+    for (i in start_index:max_index) {
+      # Trim to list with current value + lookahead
+      # Check if all fall below the desired deviation
+      segment <- lst[i:(i + self$look_ahead)]
+      if (all(vapply(segment,
+                     function(x) x < self$desired_precision, logical(1L)))) {
+        return(i)
+      }
+    }
+    return(NULL) # nolint: return_linter
+  },
+
   #' @description
   #' Executes the replication algorithm, determining the necessary number
   #' of replications to achieve and maintain the desired precision.
@@ -417,6 +453,16 @@ ReplicationsAlgorithm <- R6Class("ReplicationsAlgorithm", list( # nolint: object
       }
     }
 
+    # Correction to result...
+    for (metric in names(solutions)){
+      # Use find_position() to check for solution in initial replications
+      adj_nreps <- self$find_position(as.list(observers[[metric]]$deviation))
+      # If there was a maintained solution, replace in solutions
+      if (!is.null(adj_nreps) && !is.na(solutions[[metric]]$nreps)) {
+        solutions[[metric]]$nreps <- adj_nreps
+      }
+    }
+
     # Extract minimum replications for each metric
     self$nreps <- lapply(solutions, function(x) x$nreps)
 
diff --git a/renv.lock b/renv.lock
@@ -1128,15 +1128,15 @@
     },
     "parallelly": {
       "Package": "parallelly",
-      "Version": "1.42.0",
+      "Version": "1.43.0",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
         "parallel",
         "tools",
         "utils"
       ],
-      "Hash": "78f830734a4b488f2c72bf00cde6381e"
+      "Hash": "ca40f736e4d2dc6981c1dc9d14ea3dcf"
     },
     "patrick": {
       "Package": "patrick",
diff --git a/rmarkdown/choosing_replications.Rmd b/rmarkdown/choosing_replications.Rmd
@@ -74,7 +74,7 @@ head(ci_df)
 
 # View first ten rows where percentage deviation is below 5
 ci_df %>%
-  filter(deviation < 5L) %>%
+  filter(deviation < 0.05) %>%
   head(10L)
 ```
 
@@ -128,7 +128,7 @@ head(ci_df)
 
 # View first ten rows where percentage deviation is below 5
 ci_df %>%
-  filter(deviation < 5L) %>%
+  filter(deviation < 0.05) %>%
   head(10L)
 
 # Create plot
diff --git a/rmarkdown/choosing_replications.md b/rmarkdown/choosing_replications.md
@@ -1,7 +1,7 @@
 Choosing replications
 ================
 Amy Heather
-2025-03-18
+2025-03-21
 
 - [Set up](#set-up)
 - [Choosing the number of
@@ -157,21 +157,21 @@ head(ci_df)
 ``` r
 # View first ten rows where percentage deviation is below 5
 ci_df %>%
-  filter(deviation < 5L) %>%
+  filter(deviation < 0.05) %>%
   head(10L)
 ```
 
-    ##    replications      data cumulative_mean    stdev lower_ci upper_ci deviation
-    ## 1             3 12.141014       10.756525 1.411251 7.250782 14.26227 0.3259178
-    ## 2             4  8.889448       10.289755 1.482986 7.929994 12.64952 0.2293312
-    ## 3             5  7.603423        9.752489 1.758611 7.568885 11.93609 0.2239022
-    ## 4             6  5.009584        8.962005 2.494667 6.344013 11.58000 0.2921212
-    ## 5             7 10.140922        9.130421 2.320492 6.984324 11.27652 0.2350491
-    ## 6             8  8.303760        9.027089 2.168148 7.214472 10.83971 0.2007975
-    ## 7             9  8.667355        8.987118 2.031658 7.425448 10.54879 0.1737677
-    ## 8            10 10.806375        9.169044 1.999995 7.738334 10.59975 0.1560370
-    ## 9            11 15.829847        9.774571 2.762839 7.918471 11.63067 0.1898907
-    ## 10           12 12.654772       10.014588 2.762362 8.259467 11.76971 0.1752564
+    ##    replications      data cumulative_mean    stdev lower_ci upper_ci  deviation
+    ## 1            83  9.371854        9.571474 2.189385 9.093408 10.04954 0.04994690
+    ## 2            84 11.420438        9.593485 2.185486 9.119206 10.06776 0.04943768
+    ## 3            85 12.854029        9.631845 2.201037 9.157092 10.10660 0.04928989
+    ## 4            86  8.139575        9.614493 2.193960 9.144107 10.08488 0.04892468
+    ## 5            87  7.425008        9.589326 2.193762 9.121772 10.05688 0.04875777
+    ## 6            88 14.565590        9.645875 2.244699 9.170268 10.12148 0.04930671
+    ## 7            89 10.722522        9.657972 2.234825 9.187201 10.12874 0.04874429
+    ## 8            90  7.670319        9.635887 2.232089 9.168385 10.10339 0.04851676
+    ## 9            91 11.396571        9.655235 2.227314 9.191374 10.11910 0.04804240
+    ## 10           92  8.807005        9.646015 2.216807 9.186927 10.10510 0.04759352
     ##                   metric
     ## 1  mean_serve_time_nurse
     ## 2  mean_serve_time_nurse
@@ -280,32 +280,32 @@ head(ci_df)
 ``` r
 # View first ten rows where percentage deviation is below 5
 ci_df %>%
-  filter(deviation < 5L) %>%
+  filter(deviation < 0.05) %>%
   head(10L)
 ```
 
-    ##    replications      data cumulative_mean      stdev  lower_ci  upper_ci
-    ## 1             3 0.6440834       0.5710029 0.09133618 0.3441113 0.7978946
-    ## 2             4 0.3507966       0.5159514 0.13298209 0.3043472 0.7275555
-    ## 3             5 0.3423309       0.4812273 0.13889561 0.3087656 0.6536889
-    ## 4             6 0.2285129       0.4391082 0.16148587 0.2696392 0.6085772
-    ## 5             7 0.5248777       0.4513610 0.15093814 0.3117665 0.5909555
-    ## 6             8 0.4061181       0.4457056 0.14065408 0.3281159 0.5632954
-    ## 7             9 0.3583505       0.4359995 0.13475349 0.3324188 0.5395802
-    ## 8            10 0.5611832       0.4485179 0.13307137 0.3533244 0.5437114
-    ## 9            11 0.5558867       0.4582787 0.13032726 0.3707236 0.5458337
-    ## 10           12 0.4866132       0.4606399 0.12453108 0.3815166 0.5397632
-    ##    deviation            metric
-    ## 1  0.3973564 utilisation_nurse
-    ## 2  0.4101243 utilisation_nurse
-    ## 3  0.3583789 utilisation_nurse
-    ## 4  0.3859391 utilisation_nurse
-    ## 5  0.3092746 utilisation_nurse
-    ## 6  0.2638283 utilisation_nurse
-    ## 7  0.2375707 utilisation_nurse
-    ## 8  0.2122402 utilisation_nurse
-    ## 9  0.1910520 utilisation_nurse
-    ## 10 0.1717682 utilisation_nurse
+    ##    replications      data cumulative_mean     stdev  lower_ci  upper_ci
+    ## 1           128 0.5547391       0.4607563 0.1308109 0.4378769 0.4836357
+    ## 2           129 0.2973943       0.4594899 0.1310903 0.4366524 0.4823275
+    ## 3           130 0.6534065       0.4609816 0.1316842 0.4381307 0.4838325
+    ## 4           131 0.4492215       0.4608918 0.1311807 0.4382170 0.4835667
+    ## 5           132 0.3111502       0.4597574 0.1313274 0.4371450 0.4823698
+    ## 6           133 0.6151101       0.4609255 0.1315207 0.4383667 0.4834843
+    ## 7           134 0.4005429       0.4604749 0.1311291 0.4380688 0.4828809
+    ## 8           135 0.2899786       0.4592119 0.1314605 0.4368342 0.4815897
+    ## 9           136 0.3737779       0.4585837 0.1311774 0.4363379 0.4808295
+    ## 10          137 0.4161378       0.4582739 0.1307445 0.4361840 0.4803638
+    ##     deviation            metric
+    ## 1  0.04965623 utilisation_nurse
+    ## 2  0.04970194 utilisation_nurse
+    ## 3  0.04957009 utilisation_nurse
+    ## 4  0.04919775 utilisation_nurse
+    ## 5  0.04918338 utilisation_nurse
+    ## 6  0.04894244 utilisation_nurse
+    ## 7  0.04865849 utilisation_nurse
+    ## 8  0.04873075 utilisation_nurse
+    ## 9  0.04850981 utilisation_nurse
+    ## 10 0.04820232 utilisation_nurse
 
 ``` r
 # Create plot
@@ -574,4 +574,4 @@ seconds <- as.integer(runtime %% 60L)
 cat(sprintf("Notebook run time: %dm %ds", minutes, seconds))
 ```
 
-    ## Notebook run time: 1m 29s
+    ## Notebook run time: 1m 33s
diff --git a/tests/testthat/test-unittest-replications.R b/tests/testthat/test-unittest-replications.R
@@ -130,3 +130,46 @@ test_that("ReplicationTaubliser's update method appends new data + makes df", {
   )
   expect_identical(tab$summary_table(), mock_df)
 })
+
+
+patrick::with_parameters_test_that(
+  "the find_position() method from ReplicationsAlgorithm is correct",
+  {
+    # Set threshold to 0.5, with provided look_ahead
+    alg <- ReplicationsAlgorithm$new(param = parameters(),
+                                     desired_precision = 0.5,
+                                     look_ahead = look_ahead)
+    # Get result from algorithm and compare to expected
+    result <- alg$find_position(lst)
+    expect_identical(result, exp)
+  },
+  patrick::cases(
+    # Normal case
+    list(lst = list(NA, NA, 0.8, 0.4, 0.3),
+         exp = 4L, look_ahead = 0L),
+    # No NA values
+    list(lst = list(0.4, 0.3, 0.2, 0.1),
+         exp = 1L, look_ahead = 0L),
+    # No values below threshold
+    list(lst = list(0.8, 0.9, 0.8, 0.7),
+         exp = NULL, look_ahead = 0L),
+    # No values
+    list(lst = list(NA, NA, NA, NA),
+         exp = NULL, look_ahead = 0L),
+    # Empty list
+    list(lst = list(),
+         exp = NULL, look_ahead = 0L),
+    # Not full lookahead
+    list(lst = list(NA, NA, 0.8, 0.8, 0.3, 0.3, 0.3),
+         exp = NULL, look_ahead = 3L),
+    # Meets lookahead
+    list(lst = list(NA, NA, 0.8, 0.8, 0.3, 0.3, 0.3, 0.3),
+         exp = 5L, look_ahead = 3L)
+  )
+)
+
+
+test_that("find_position() fails if not supplied a list", {
+  alg <- ReplicationsAlgorithm$new(param = parameters())
+  expect_error(alg$find_position(c(1L, 2L, 3L)))
+})