Skip to content

Commit f16e871

Browse files
authored
Merge pull request #169 from mrc-ide/sero_fitting
sero_df checking and handling correctly
2 parents f41e648 + b3c2b23 commit f16e871

File tree

6 files changed

+127
-37
lines changed

6 files changed

+127
-37
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: squire
22
Type: Package
33
Title: SEIR transmission model of COVID-19
4-
Version: 0.6.7
4+
Version: 0.6.8
55
Authors@R: c(
66
person("OJ", "Watson", email = "o.watson15@imperial.ac.uk", role = c("aut", "cre")),
77
person("Patrick", "Walker", email = "patrick.walker06@imperial.ac.uk", role = c("aut")),

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# squire 0.6.8
2+
3+
* `run_deterministic_comparison` patches for sero fitting and argument checks
4+
15
# squire 0.6.7
26

37
* `pmcmc` can now be used to fit to serology data (deterministic model only)

R/particle.R

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ compare_output <- function(model, pars_obs, data, type="explicit_SEEIR_model") {
361361
log_weights <- log_weights +
362362
ll_nbinom(data$deaths[t], model_deaths, phi_death, k_death, exp_noise)
363363

364-
}
364+
}
365365

366366
# We are not going to be bringing cases in so comment this out
367367

@@ -484,9 +484,9 @@ intervention_dates_for_odin <- function(dates,
484484
dates <- dates[include]
485485
change <- change[include]
486486

487-
# if start date is in the middle of our dates but not incldued
488-
# then remove all earlier dates and change the last date before the
489-
# start date to the start date
487+
# if start date is in the middle of our dates but not incldued
488+
# then remove all earlier dates and change the last date before the
489+
# start date to the start date
490490
} else if (any(start_date >= dates)) {
491491

492492
# which are before the start date
@@ -501,8 +501,8 @@ intervention_dates_for_odin <- function(dates,
501501

502502
dates[1] <- as.Date(start_date)
503503

504-
# if all the dates are after the start date then add the start date
505-
# and we assume the first change value is starting_change
504+
# if all the dates are after the start date then add the start date
505+
# and we assume the first change value is starting_change
506506
} else {
507507

508508
extra_start <- seq.Date(start_date, dates[1]-1, 1)
@@ -716,11 +716,14 @@ run_deterministic_comparison <- function(data,
716716

717717
# calculate ll for the seroprevalence
718718
lls <- 0
719-
if("sero_df" %in% obs_params && "sero_det" %in% obs_params) {
719+
if("sero_df" %in% names(obs_params) && "sero_det" %in% names(obs_params)) {
720720

721721
sero_df <- obs_params$sero_df
722722
sero_det <- obs_params$sero_det
723723

724+
# put some checks here that sero_df is correctly formatted
725+
check_sero_df(sero_df)
726+
724727
# were there actually seroprevalence data points to compare against
725728
if(nrow(sero_df) > 0) {
726729

@@ -736,22 +739,22 @@ run_deterministic_comparison <- function(data,
736739

737740
}
738741

739-
# get symptom incidence
740-
symptoms <- rowSums(out[,index$E2]) * model_params$gamma_E
742+
# get symptom incidence
743+
symptoms <- rowSums(out[,index$E2]) * model_params$gamma_E
741744

742-
# dates of incidence, pop size and dates of sero surveys
743-
dates <- data$date[[1]] + seq_len(nrow(out)) - 1L
744-
N <- sum(model_params$population)
745-
sero_dates <- list(sero_df$date_end, sero_df$date_start, sero_df$date_start + as.integer((sero_df$date_end - sero_df$date_start)/2))
746-
unq_sero_dates <- unique(c(sero_df$date_end, sero_df$date_start, sero_df$date_start + as.integer((sero_df$date_end - sero_df$date_start)/2)))
747-
det <- obs_params$sero_det
745+
# dates of incidence, pop size and dates of sero surveys
746+
dates <- data$date[[1]] + seq_len(nrow(out)) - 1L
747+
N <- sum(model_params$population)
748+
sero_dates <- list(sero_df$date_end, sero_df$date_start, sero_df$date_start + as.integer((sero_df$date_end - sero_df$date_start)/2))
749+
unq_sero_dates <- unique(c(sero_df$date_end, sero_df$date_start, sero_df$date_start + as.integer((sero_df$date_end - sero_df$date_start)/2)))
750+
det <- obs_params$sero_det
748751

749-
# estimate model seroprev
750-
sero_model <- vapply(unq_sero_dates, sero_at_date, numeric(1), symptoms, det, dates, N)
751-
sero_model_mat <- do.call(cbind,lapply(sero_dates, function(x) {sero_model[match(x, unq_sero_dates)]}))
752+
# estimate model seroprev
753+
sero_model <- vapply(unq_sero_dates, sero_at_date, numeric(1), symptoms, det, dates, N)
754+
sero_model_mat <- do.call(cbind,lapply(sero_dates, function(x) {sero_model[match(x, unq_sero_dates)]}))
752755

753-
# likelihood of model obvs
754-
lls <- rowMeans(dbinom(sero_df$sero_pos, sero_df$samples, sero_model_mat, log = TRUE))
756+
# likelihood of model obvs
757+
lls <- rowMeans(dbinom(sero_df$sero_pos, sero_df$samples, sero_model_mat, log = TRUE))
755758

756759
}
757760

@@ -784,3 +787,15 @@ run_deterministic_comparison <- function(data,
784787

785788
ret
786789
}
790+
791+
792+
#' @noRd
793+
check_sero_df <- function(sero_df) {
794+
795+
assert_date(sero_df$date_start)
796+
assert_date(sero_df$date_end)
797+
assert_pos_int(sero_df$sero_pos)
798+
assert_pos_int(sero_df$samples)
799+
assert_le(sero_df$sero_pos, sero_df$samples)
800+
801+
}

README.Rmd

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ This plot will plot each of the compartments of the model output. We can also pl
144144
plot(r, var_select = c("E", "IMild"))
145145
```
146146

147-
Or, you can specify one of `deaths`, `infections`, `hospital_occupancy`, `ICU_occupancy`, `hospital_demand` or `ICU_demand`, and plot these summary metrics that represent the combintion of a number of different compartment e.g:
147+
Or, you can specify one of `deaths`, `infections`, `hospital_occupancy`, `ICU_occupancy`, `hospital_demand` or `ICU_demand`, and plot these summary metrics that represent the combination of a number of different compartment e.g:
148148

149149
```{r subset variables plot2}
150150
plot(r, var_select = "deaths")
@@ -178,7 +178,7 @@ length of simulation and the timestep. For a full list of model inputs, please
178178
see the function [documentation](https://mrc-ide.github.io/squire/reference/run_explicit_SEEIR_model.html)
179179

180180
For example, changing the initial R0 (default = 3), number of replicates (
181-
default = 10), simualtion length (default = 365 days) and time step (default =
181+
default = 10), simulation length (default = 365 days) and time step (default =
182182
0.5 days), as well as setting the population and contact matrix manually:
183183

184184
```{r set params}
@@ -201,7 +201,7 @@ plot(r)
201201
```
202202

203203
We can also change the R0 and contact matrix at set time points, to reflect
204-
changing behaviour resulting from interventions. For example to set a 80%
204+
changing behaviour resulting from interventions. For example to set an 80%
205205
reduction in the contact matrix after 100 days :
206206

207207
```{r set contact matrix decrease}
@@ -356,7 +356,7 @@ three elements in `out` being the simulation outputs, the model and model parame
356356
plot(out, "deaths", date_0 = max(df$date), x_var = "date")
357357
```
358358

359-
With default parameters, `calibrate` will simulate up the maximum date in the data
359+
With default parameters, `calibrate` will simulate up to the maximum date in the data
360360
provided. The fit to this data can be shown using the plotting function and specifying `particle_fit`
361361
to be `TRUE`
362362

@@ -380,7 +380,7 @@ plot(out$scan_results, what = "probability")
380380

381381
The reason for the poor fits to the data shown earlier is because Algeria has implemented
382382
interventions prior to today. These can also be incorporated into `calibrate`.
383-
For example, we can grab the assumed changes to transmission fased on government intervention
383+
For example, we can grab the assumed changes to transmission based on government intervention
384384
for Algeria.
385385

386386
```{r dza interventions}
@@ -420,7 +420,7 @@ plot(out, particle_fit = TRUE)
420420
That is a much better fit.
421421

422422
Any parameter that you could provide to `run_explicit_SEEIR_model` can be passed to `calibrate`. This
423-
includes time varying arguments such as `contact_matrix_set`, `ICU_bed_capacity` and `hosp_bed_capacity`. To incorporate these into model fitting correctly, the date at which these change must be provided (similarly to how `date_R0_change` was provided above) using `date_ICU_bed_capacity_change`, `date_ICU_bed_capacity_change` and `date_hosp_bed_capacity_change` respecitvely. In addition, the user must provide a baseline value for these, i.e. the contact matrix and bed capacity at the beginning of the epidemic:
423+
includes time varying arguments such as `contact_matrix_set`, `ICU_bed_capacity` and `hosp_bed_capacity`. To incorporate these into model fitting correctly, the date at which these change must be provided (similarly to how `date_R0_change` was provided above) using `date_ICU_bed_capacity_change`, `date_ICU_bed_capacity_change` and `date_hosp_bed_capacity_change` respectively. In addition, the user must provide a baseline value for these, i.e. the contact matrix and bed capacity at the beginning of the epidemic:
424424

425425
```{r particle with ints of all kinds, warning=FALSE, message=FALSE}
426426
out <- calibrate(
@@ -510,7 +510,7 @@ ggproj + ggplot2::geom_hline(yintercept = out$parameters$ICU_bed_capacity)
510510

511511
We can see above that the intervention introduced is nearly sufficient to prevent ICU demand (solid line red) from exceeding the supply, whereas in the unmitigated strategy this did occur.
512512

513-
We can also model changing interventions by changing the contact matrix over time as well as the availability of ICU and hospital beds. E.g. decreasing contacts by 75% in a week before relaxing it to 80% in 30 days time, while increasing hospital and ICU beds by 20% in 30 days time. (N.B. We can turn off the automatic scenario parameter labelling with `add_parms_to_scenarios = FALSE`):
513+
We can also model changing interventions by changing the contact matrix over time as well as the availability of ICU and hospital beds. E.g. decreasing contacts by 75% in a week before relaxing it to 80% in 30 days time, while increasing hospital and ICU beds by 200% in 30 days time. (N.B. We can turn off the automatic scenario parameter labelling with `add_parms_to_scenarios = FALSE`):
514514

515515
```{r projection relative beds}
516516
# create our projections

README.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ plot(r, var_select = c("E", "IMild"))
187187
<img src="man/figures/README-subset variables plot-1.png" width="100%" />
188188
Or, you can specify one of `deaths`, `infections`, `hospital_occupancy`,
189189
`ICU_occupancy`, `hospital_demand` or `ICU_demand`, and plot these
190-
summary metrics that represent the combintion of a number of different
190+
summary metrics that represent the combination of a number of different
191191
compartment
192192
e.g:
193193

@@ -251,7 +251,7 @@ full list of model inputs, please see the function
251251
[documentation](https://mrc-ide.github.io/squire/reference/run_explicit_SEEIR_model.html)
252252

253253
For example, changing the initial R0 (default = 3), number of replicates
254-
( default = 10), simualtion length (default = 365 days) and time step
254+
( default = 10), simulation length (default = 365 days) and time step
255255
(default = 0.5 days), as well as setting the population and contact
256256
matrix manually:
257257

@@ -282,7 +282,7 @@ plot(r)
282282

283283
We can also change the R0 and contact matrix at set time points, to
284284
reflect changing behaviour resulting from interventions. For example to
285-
set a 80% reduction in the contact matrix after 100 days :
285+
set an 80% reduction in the contact matrix after 100 days :
286286

287287
``` r
288288

@@ -306,7 +306,7 @@ plot(r, var_select = "infections")
306306

307307
where `n_E2_I` is the daily number of new infections.
308308

309-
To show an 80% reduction after 50 days but only maintained for 30 days :
309+
To show an 80% reduction after 80 days but only maintained for 40 days :
310310

311311
``` r
312312

@@ -329,7 +329,7 @@ plot(r, var_select = "infections")
329329

330330
<img src="man/figures/README-set contact matrix decrease and relax-1.png" width="100%" />
331331

332-
Alternatively, we could set a changing R0, which falls below 1 after 50
332+
Alternatively, we could set a changing R0, which falls below 1 after 80
333333
days:
334334

335335
``` r
@@ -501,7 +501,7 @@ plot(out, "deaths", date_0 = max(df$date), x_var = "date")
501501

502502
<img src="man/figures/README-plot particle deaths-1.png" width="100%" />
503503

504-
With default parameters, `calibrate` will simulate up the maximum date
504+
With default parameters, `calibrate` will simulate up to the maximum date
505505
in the data provided. The fit to this data can be shown using the
506506
plotting function and specifying `particle_fit` to be `TRUE`
507507

@@ -533,7 +533,7 @@ plot(out$scan_results, what = "probability")
533533
The reason for the poor fits to the data shown earlier is because
534534
Algeria has implemented interventions prior to today. These can also be
535535
incorporated into `calibrate`. For example, we can grab the assumed
536-
changes to transmission fased on government intervention for
536+
changes to transmission based on government intervention for
537537
Algeria.
538538

539539
``` r
@@ -590,7 +590,7 @@ incorporate these into model fitting correctly, the date at which these
590590
change must be provided (similarly to how `date_R0_change` was provided
591591
above) using `date_ICU_bed_capacity_change`,
592592
`date_ICU_bed_capacity_change` and `date_hosp_bed_capacity_change`
593-
respecitvely. In addition, the user must provide a baseline value for
593+
respectively. In addition, the user must provide a baseline value for
594594
these, i.e. the contact matrix and bed capacity at the beginning of the
595595
epidemic:
596596

@@ -725,7 +725,7 @@ whereas in the unmitigated strategy this did occur.
725725
We can also model changing interventions by changing the contact matrix
726726
over time as well as the availability of ICU and hospital beds. E.g.
727727
decreasing contacts by 75% in a week before relaxing it to 80% in 30
728-
days time, while increasing hospital and ICU beds by 20% in 30 days
728+
days time, while increasing hospital and ICU beds by 200% in 30 days
729729
time. (N.B. We can turn off the automatic scenario parameter labelling
730730
with `add_parms_to_scenarios = FALSE`):
731731

tests/testthat/test-pmcmc.R

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1703,6 +1703,8 @@ test_that("sero fitting works", {
17031703
pars_obs$sero_df <- sero_df
17041704
pars_obs$sero_det <- sero_det
17051705

1706+
# following checks to see that it is being correctly used to get better likelihoods
1707+
# given contribution from the sero ll
17061708
Sys.setenv("SQUIRE_PARALLEL_DEBUG"=TRUE)
17071709
out <- pmcmc(data = data,
17081710
n_mcmc = 5,
@@ -1753,4 +1755,73 @@ test_that("sero fitting works", {
17531755

17541756
expect_s3_class(plot(out, what = "deaths", particle_fit = TRUE), "gg")
17551757

1758+
1759+
# and checks that the sero_df date is correctly formatted
1760+
pars_obs$sero_df$date_end <- factor(pars_obs$sero_df$date_end)
1761+
expect_error(pmcmc(data = data,
1762+
n_mcmc = 5,
1763+
log_likelihood = NULL,
1764+
log_prior = NULL,
1765+
n_particles = 2,
1766+
steps_per_day = steps_per_day,
1767+
output_proposals = FALSE,
1768+
n_chains = 1,
1769+
replicates = 20,
1770+
burnin = 5,
1771+
squire_model = squire_model,
1772+
pars_init = pars_init,
1773+
pars_min = pars_min,
1774+
pars_max = pars_max,
1775+
pars_discrete = pars_discrete,
1776+
pars_obs = pars_obs,
1777+
proposal_kernel = proposal_kernel,
1778+
R0_change = R0_change,
1779+
date_R0_change = date_R0_change,
1780+
country = country),
1781+
"date_end must be a date or ISO-formatted string")
1782+
1783+
17561784
})
1785+
1786+
1787+
#------------------------------------------------
1788+
test_that("sero df checking works", {
1789+
1790+
sero_df <- data.frame("samples" = 1000, "sero_pos" = 10,
1791+
"date_start" = as.Date("2020-04-15"),
1792+
"date_end" = as.Date("2020-04-19"))
1793+
1794+
sero_df_fail <- sero_df
1795+
1796+
# date_start
1797+
sero_df_fail <- sero_df
1798+
sero_df_fail$date_start <- factor(sero_df_fail$date_start)
1799+
expect_error(check_sero_df(sero_df_fail),
1800+
"date_start must be a date or ISO-formatted string")
1801+
1802+
# date end
1803+
sero_df_fail <- sero_df
1804+
sero_df_fail$date_end <- factor(sero_df_fail$date_end)
1805+
expect_error(check_sero_df(sero_df_fail),
1806+
"date_end must be a date or ISO-formatted string")
1807+
1808+
# integers
1809+
sero_df_fail <- sero_df
1810+
sero_df_fail$samples <- 101010.123
1811+
expect_error(check_sero_df(sero_df_fail),
1812+
"samples must be integer valued")
1813+
1814+
# integers
1815+
sero_df_fail <- sero_df
1816+
sero_df_fail$sero_pos <- 10.123
1817+
expect_error(check_sero_df(sero_df_fail),
1818+
"sero_pos must be integer valued")
1819+
1820+
# comparators
1821+
sero_df_fail <- sero_df
1822+
sero_df_fail$sero_pos <- 10000
1823+
expect_error(check_sero_df(sero_df_fail),
1824+
"sero_pos must be less than")
1825+
1826+
})
1827+

0 commit comments

Comments
 (0)