Skip to content

Commit 7d77c55

Browse files
authored
Merge pull request #47 from pythonhealthdatascience/dev
Dev
2 parents 997ce22 + 5956e2b commit 7d77c55

File tree

5 files changed

+135
-46
lines changed

5 files changed

+135
-46
lines changed

R/choose_replications.R

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -227,11 +227,6 @@ ReplicationsAlgorithm <- R6Class("ReplicationsAlgorithm", list( # nolint: object
227227
desired_precision = NA,
228228

229229
#' @field initial_replications Number of initial replications to perform.
230-
#' Note that the minimum solution will be the value of initial_replications
231-
#' (i.e. if require 20 initial replications but was resolved in 5, solution
232-
#' output will still be 20). Although, if initial_replications < 3, solution
233-
#' will still be at least 3, as that is the minimum required to calculate the
234-
#' confidence intervals.
235230
initial_replications = NA,
236231

237232
#' @field look_ahead Minimum additional replications to look ahead to assess
@@ -313,6 +308,47 @@ ReplicationsAlgorithm <- R6Class("ReplicationsAlgorithm", list( # nolint: object
313308
as.integer((self$look_ahead / 100L) * max(self$reps, 100L))
314309
},
315310

311+
#' @description
312+
#' Find the first position where element is below deviation, and this is
313+
#' maintained through the lookahead period.
314+
#' This is used to correct the ReplicationsAlgorithm, which cannot return
315+
#' a solution below the initial_replications.
316+
#' @param lst List of numbers to compare against desired deviation.
317+
#' @return Integer, minimum replications required to meet and maintain
318+
#' precision.
319+
find_position = function(lst) {
320+
# Ensure that the input is a list
321+
if (!is.list(lst)) {
322+
stop("find_position requires a list but was supplied: ", typeof(lst),
323+
call. = FALSE)
324+
}
325+
326+
# Check if list is empty or no values below threshold
327+
if (length(lst) == 0L || all(is.na(lst)) || !any(unlist(lst) < 0.5)) {
328+
return(NULL)
329+
}
330+
331+
# Find the first non-null value in the list
332+
start_index <- which(!vapply(lst, is.na, logical(1L)))[1L]
333+
334+
# Iterate through the list, stopping when at last point where we still
335+
# have enough elements to look ahead
336+
max_index <- length(lst) - self$look_ahead
337+
if (start_index > max_index) {
338+
return(NULL)
339+
}
340+
for (i in start_index:max_index) {
341+
# Trim to list with current value + lookahead
342+
# Check if all fall below the desired deviation
343+
segment <- lst[i:(i + self$look_ahead)]
344+
if (all(vapply(segment,
345+
function(x) x < self$desired_precision, logical(1L)))) {
346+
return(i)
347+
}
348+
}
349+
return(NULL) # nolint: return_linter
350+
},
351+
316352
#' @description
317353
#' Executes the replication algorithm, determining the necessary number
318354
#' of replications to achieve and maintain the desired precision.
@@ -417,6 +453,16 @@ ReplicationsAlgorithm <- R6Class("ReplicationsAlgorithm", list( # nolint: object
417453
}
418454
}
419455

456+
# Correction to result...
457+
for (metric in names(solutions)){
458+
# Use find_position() to check for solution in initial replications
459+
adj_nreps <- self$find_position(as.list(observers[[metric]]$deviation))
460+
# If there was a maintained solution, replace in solutions
461+
if (!is.null(adj_nreps) && !is.na(solutions[[metric]]$nreps)) {
462+
solutions[[metric]]$nreps <- adj_nreps
463+
}
464+
}
465+
420466
# Extract minimum replications for each metric
421467
self$nreps <- lapply(solutions, function(x) x$nreps)
422468

renv.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,15 +1128,15 @@
11281128
},
11291129
"parallelly": {
11301130
"Package": "parallelly",
1131-
"Version": "1.42.0",
1131+
"Version": "1.43.0",
11321132
"Source": "Repository",
11331133
"Repository": "CRAN",
11341134
"Requirements": [
11351135
"parallel",
11361136
"tools",
11371137
"utils"
11381138
],
1139-
"Hash": "78f830734a4b488f2c72bf00cde6381e"
1139+
"Hash": "ca40f736e4d2dc6981c1dc9d14ea3dcf"
11401140
},
11411141
"patrick": {
11421142
"Package": "patrick",

rmarkdown/choosing_replications.Rmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ head(ci_df)
7474
7575
# View first ten rows where percentage deviation is below 5
7676
ci_df %>%
77-
filter(deviation < 5L) %>%
77+
filter(deviation < 0.05) %>%
7878
head(10L)
7979
```
8080

@@ -128,7 +128,7 @@ head(ci_df)
128128
129129
# View first ten rows where percentage deviation is below 5
130130
ci_df %>%
131-
filter(deviation < 5L) %>%
131+
filter(deviation < 0.05) %>%
132132
head(10L)
133133
134134
# Create plot

rmarkdown/choosing_replications.md

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Choosing replications
22
================
33
Amy Heather
4-
2025-03-18
4+
2025-03-21
55

66
- [Set up](#set-up)
77
- [Choosing the number of
@@ -157,21 +157,21 @@ head(ci_df)
157157
``` r
158158
# View first ten rows where percentage deviation is below 5
159159
ci_df %>%
160-
filter(deviation < 5L) %>%
160+
filter(deviation < 0.05) %>%
161161
head(10L)
162162
```
163163

164-
## replications data cumulative_mean stdev lower_ci upper_ci deviation
165-
## 1 3 12.141014 10.756525 1.411251 7.250782 14.26227 0.3259178
166-
## 2 4 8.889448 10.289755 1.482986 7.929994 12.64952 0.2293312
167-
## 3 5 7.603423 9.752489 1.758611 7.568885 11.93609 0.2239022
168-
## 4 6 5.009584 8.962005 2.494667 6.344013 11.58000 0.2921212
169-
## 5 7 10.140922 9.130421 2.320492 6.984324 11.27652 0.2350491
170-
## 6 8 8.303760 9.027089 2.168148 7.214472 10.83971 0.2007975
171-
## 7 9 8.667355 8.987118 2.031658 7.425448 10.54879 0.1737677
172-
## 8 10 10.806375 9.169044 1.999995 7.738334 10.59975 0.1560370
173-
## 9 11 15.829847 9.774571 2.762839 7.918471 11.63067 0.1898907
174-
## 10 12 12.654772 10.014588 2.762362 8.259467 11.76971 0.1752564
164+
## replications data cumulative_mean stdev lower_ci upper_ci deviation
165+
## 1 83 9.371854 9.571474 2.189385 9.093408 10.04954 0.04994690
166+
## 2 84 11.420438 9.593485 2.185486 9.119206 10.06776 0.04943768
167+
## 3 85 12.854029 9.631845 2.201037 9.157092 10.10660 0.04928989
168+
## 4 86 8.139575 9.614493 2.193960 9.144107 10.08488 0.04892468
169+
## 5 87 7.425008 9.589326 2.193762 9.121772 10.05688 0.04875777
170+
## 6 88 14.565590 9.645875 2.244699 9.170268 10.12148 0.04930671
171+
## 7 89 10.722522 9.657972 2.234825 9.187201 10.12874 0.04874429
172+
## 8 90 7.670319 9.635887 2.232089 9.168385 10.10339 0.04851676
173+
## 9 91 11.396571 9.655235 2.227314 9.191374 10.11910 0.04804240
174+
## 10 92 8.807005 9.646015 2.216807 9.186927 10.10510 0.04759352
175175
## metric
176176
## 1 mean_serve_time_nurse
177177
## 2 mean_serve_time_nurse
@@ -280,32 +280,32 @@ head(ci_df)
280280
``` r
281281
# View first ten rows where percentage deviation is below 5
282282
ci_df %>%
283-
filter(deviation < 5L) %>%
283+
filter(deviation < 0.05) %>%
284284
head(10L)
285285
```
286286

287-
## replications data cumulative_mean stdev lower_ci upper_ci
288-
## 1 3 0.6440834 0.5710029 0.09133618 0.3441113 0.7978946
289-
## 2 4 0.3507966 0.5159514 0.13298209 0.3043472 0.7275555
290-
## 3 5 0.3423309 0.4812273 0.13889561 0.3087656 0.6536889
291-
## 4 6 0.2285129 0.4391082 0.16148587 0.2696392 0.6085772
292-
## 5 7 0.5248777 0.4513610 0.15093814 0.3117665 0.5909555
293-
## 6 8 0.4061181 0.4457056 0.14065408 0.3281159 0.5632954
294-
## 7 9 0.3583505 0.4359995 0.13475349 0.3324188 0.5395802
295-
## 8 10 0.5611832 0.4485179 0.13307137 0.3533244 0.5437114
296-
## 9 11 0.5558867 0.4582787 0.13032726 0.3707236 0.5458337
297-
## 10 12 0.4866132 0.4606399 0.12453108 0.3815166 0.5397632
298-
## deviation metric
299-
## 1 0.3973564 utilisation_nurse
300-
## 2 0.4101243 utilisation_nurse
301-
## 3 0.3583789 utilisation_nurse
302-
## 4 0.3859391 utilisation_nurse
303-
## 5 0.3092746 utilisation_nurse
304-
## 6 0.2638283 utilisation_nurse
305-
## 7 0.2375707 utilisation_nurse
306-
## 8 0.2122402 utilisation_nurse
307-
## 9 0.1910520 utilisation_nurse
308-
## 10 0.1717682 utilisation_nurse
287+
## replications data cumulative_mean stdev lower_ci upper_ci
288+
## 1 128 0.5547391 0.4607563 0.1308109 0.4378769 0.4836357
289+
## 2 129 0.2973943 0.4594899 0.1310903 0.4366524 0.4823275
290+
## 3 130 0.6534065 0.4609816 0.1316842 0.4381307 0.4838325
291+
## 4 131 0.4492215 0.4608918 0.1311807 0.4382170 0.4835667
292+
## 5 132 0.3111502 0.4597574 0.1313274 0.4371450 0.4823698
293+
## 6 133 0.6151101 0.4609255 0.1315207 0.4383667 0.4834843
294+
## 7 134 0.4005429 0.4604749 0.1311291 0.4380688 0.4828809
295+
## 8 135 0.2899786 0.4592119 0.1314605 0.4368342 0.4815897
296+
## 9 136 0.3737779 0.4585837 0.1311774 0.4363379 0.4808295
297+
## 10 137 0.4161378 0.4582739 0.1307445 0.4361840 0.4803638
298+
## deviation metric
299+
## 1 0.04965623 utilisation_nurse
300+
## 2 0.04970194 utilisation_nurse
301+
## 3 0.04957009 utilisation_nurse
302+
## 4 0.04919775 utilisation_nurse
303+
## 5 0.04918338 utilisation_nurse
304+
## 6 0.04894244 utilisation_nurse
305+
## 7 0.04865849 utilisation_nurse
306+
## 8 0.04873075 utilisation_nurse
307+
## 9 0.04850981 utilisation_nurse
308+
## 10 0.04820232 utilisation_nurse
309309

310310
``` r
311311
# Create plot
@@ -574,4 +574,4 @@ seconds <- as.integer(runtime %% 60L)
574574
cat(sprintf("Notebook run time: %dm %ds", minutes, seconds))
575575
```
576576

577-
## Notebook run time: 1m 29s
577+
## Notebook run time: 1m 33s

tests/testthat/test-unittest-replications.R

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,46 @@ test_that("ReplicationTaubliser's update method appends new data + makes df", {
130130
)
131131
expect_identical(tab$summary_table(), mock_df)
132132
})
133+
134+
135+
patrick::with_parameters_test_that(
136+
"the find_position() method from ReplicationsAlgorithm is correct",
137+
{
138+
# Set threshold to 0.5, with provided look_ahead
139+
alg <- ReplicationsAlgorithm$new(param = parameters(),
140+
desired_precision = 0.5,
141+
look_ahead = look_ahead)
142+
# Get result from algorithm and compare to expected
143+
result <- alg$find_position(lst)
144+
expect_identical(result, exp)
145+
},
146+
patrick::cases(
147+
# Normal case
148+
list(lst = list(NA, NA, 0.8, 0.4, 0.3),
149+
exp = 4L, look_ahead = 0L),
150+
# No NA values
151+
list(lst = list(0.4, 0.3, 0.2, 0.1),
152+
exp = 1L, look_ahead = 0L),
153+
# No values below threshold
154+
list(lst = list(0.8, 0.9, 0.8, 0.7),
155+
exp = NULL, look_ahead = 0L),
156+
# No values
157+
list(lst = list(NA, NA, NA, NA),
158+
exp = NULL, look_ahead = 0L),
159+
# Empty list
160+
list(lst = list(),
161+
exp = NULL, look_ahead = 0L),
162+
# Not full lookahead
163+
list(lst = list(NA, NA, 0.8, 0.8, 0.3, 0.3, 0.3),
164+
exp = NULL, look_ahead = 3L),
165+
# Meets lookahead
166+
list(lst = list(NA, NA, 0.8, 0.8, 0.3, 0.3, 0.3, 0.3),
167+
exp = 5L, look_ahead = 3L)
168+
)
169+
)
170+
171+
172+
test_that("find_position() fails if not supplied a list", {
173+
alg <- ReplicationsAlgorithm$new(param = parameters())
174+
expect_error(alg$find_position(c(1L, 2L, 3L)))
175+
})

0 commit comments

Comments
 (0)