Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ importFrom(flextable,width)
importFrom(forcats,as_factor)
importFrom(forcats,fct_drop)
importFrom(forcats,fct_infreq)
importFrom(forcats,fct_recode)
importFrom(forcats,fct_relevel)
importFrom(forcats,fct_reorder)
importFrom(forcats,fct_rev)
Expand Down Expand Up @@ -146,6 +147,9 @@ importFrom(ggplot2,unit)
importFrom(ggplot2,vars)
importFrom(glue,glue)
importFrom(lifecycle,deprecated)
importFrom(lubridate,as_date)
importFrom(lubridate,days)
importFrom(lubridate,ymd)
importFrom(purrr,discard)
importFrom(purrr,imap)
importFrom(purrr,iwalk)
Expand Down Expand Up @@ -181,6 +185,7 @@ importFrom(scales,breaks_width)
importFrom(scales,label_percent)
importFrom(stats,na.omit)
importFrom(stats,rbinom)
importFrom(stats,rexp)
importFrom(stats,rnorm)
importFrom(stats,runif)
importFrom(stringr,fixed)
Expand Down
180 changes: 178 additions & 2 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,15 @@ grstat_example = function(N=200, seed=42, ...){

recist = example_rc(enrolres, seed, ...)

rtn = lst(enrolres, ae, recist) %>%
fu = example_fu(enrolres, recist, seed, ...)

rtn = lst(enrolres, ae, recist, fu) %>%
imap(~.x %>% mutate(crfname=.y %>% set_label("Form name")))
rtn$date_extraction = "2024/01/01"
rtn$datetime_extraction = structure(1704067200, class = c("POSIXct", "POSIXt"),
tzone = "Europe/Paris")

rtn = .remove_post_event(rtn)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plus joli en pipe après le imap ?

rtn
}

Expand Down Expand Up @@ -159,6 +163,7 @@ example_ae = function(enrolres, seed, p_na=0,
}



#' Simulate a RECIST dataset
#'
#' Internal function that simulates a synthetic RECIST dataset following the
Expand Down Expand Up @@ -283,6 +288,177 @@ example_rc = function(enrolres, seed,
}





# Calculer le temps de décès selon bras et statut progression. Puis en dernier, tronquer les visites recist et le suivi ae a la date de deces/censure


#' Generate Simulated Survival Data
#'
#' Internal function that simulates survival data, which depend on treatment arm
#' and date of progression if applicable.
#'
#' @param enrolres the enrolment result table, from [example_enrol()].
#' @param recist the recist table with progression date, from [example_rc()].
#' @param seed Integer. Random seed for reproducibility (can be `NULL`).
#' @param censor_min_days minimum time in days between last RECIST evaluation and administrative censoring
#' @param censor_max_days maximum time in days between last RECIST evaluation and administrative censoring
#' @param ... Additional arguments (currently unused).
#'
#' @return A tibble with `N` rows and the following columns:
#'
#' - `subjid`: Subject ID
#' - `fu_status`: Status of the patient (0 - Alive/Censored , 1 - Dead)
#' - `fu_date`: Date of latest news (death date if dead, otherwise censoring date)
#'
#' @keywords internal
#' @importFrom dplyr arrange select mutate left_join if_else n
#' @importFrom lubridate ymd as_date days
#' @importFrom stats rexp
#' @importFrom forcats as_factor fct_recode
example_fu = function(enrolres, recist, seed, lambda_censor = 0.3, lambda_control = 0.2, beta_arm = -0.6, beta_prog_status = 0.5, ...){
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bienvenue dans la galère des noms de variables 😅
Vu qu'on utilise ... pour passer les arguments, on n'appelle jamais example_fu (non exporté).
Chaque argument est dispatché à sa fonction :
grstat_example(beta_arm=-0.7, beta_prog_status=0.4, rc_coef_treatement=4)
Du coup, il faudrait préfixer tes arguments par fu_ pour qu'on comprenne bien que beta_arm vaut pour la survie et pas pour la progression.


stopifnot(is.data.frame(enrolres), is.data.frame(recist))
if (!all(c("subjid", "date_inclusion") %in% names(enrolres))) {
stop("`enrolres` must contain columns `subjid` and `date_inclusion`.")
}
Comment on lines +323 to +325
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Très bien la programmation défensive :-)
Vu que c'est fréquent, j'avais ajouté un helper pour simplifier : assert_names_exists()
Cela dit, vu que c'est une fonction interne, pas besoin de trop t'embêter : "l'utilisateur" qui donne enrolres et recist c'est la fonction grstat_example() donc on est tranquille.

if (!all(c("subjid", "rcdt", "rcresp") %in% names(recist))) {
stop("`recist` must contain columns `subjid`, `rcdt` (date of RECIST evaluation) and `rcresp` (RECIST global response).")
}
if(!("arm" %in% names(enrolres))) {
stop("`enrolres` must contain `arm`.")
}
if(!is.null(seed)) { set.seed(seed) }


enrolres = enrolres %>% arrange(subjid)
recist = recist %>% arrange(subjid)
Comment on lines +335 to +336
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Elles ne sont pas arrangées par défaut ?
Si c'est le cas il faut qu'on corrige dans les autres fonctions


prog_status = .progression_status(recist)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comme .progression_status() retourne une df, il faudrait plutôt appeler le résultat data_prog


# Survival rate assigned for each subject, according to arm of treatment and progression status
lambda = .surv_coef(arm = enrolres$arm, prog_status = prog_status$status, lambda_control = lambda_control,
beta_arm = beta_arm, beta_prog_status = beta_prog_status)

time_to_death = rexp(n = nrow(enrolres), rate = lambda)
time_to_censor = rexp(n = nrow(enrolres), rate = lambda_censor)
Comment on lines +341 to +345
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ça marche, mais ce n'est pas tidy.
En codant comme ça tu t'exposes à des différences de longueur de vecteur difficiles à débuguer.
En pratique comme on est dans la fonction exemple, on a un environnement très contrôlé, mais ce sont de bonnes pratiques.

Mais vu que tu as super bien codé tes fonctions, tu peux tout simplement déplacer ces 3 variables dans le mutate un peu plus bas :-)
(il faudra juste retirer le 1er select mais sinon ça devrait marcher direct)


fu_data = enrolres %>%

select(subjid, date_inclusion) %>%
left_join(prog_status, by = join_by(subjid)) %>%

mutate(date_inclusion = as.Date(date_inclusion),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

C'est déjà une date, non ?


death_date = date_inclusion + days(ceiling(time_to_death*365.25)),
censoring_date = date_inclusion + days(ceiling(time_to_censor*365.25)),
Comment on lines +354 to +355
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tu pourrais aussi utiliser lubridate::years()


# Status and date of latest news
fu_status = fct_recode(as_factor(as.integer(death_date <= censoring_date)),
"Alive/censored" = "0",
"Dead" = "1"),
fu_date = if_else(condition = death_date <= censoring_date,
true = death_date,
false = censoring_date)) %>%
Comment on lines +361 to +363
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Il y a déjà une fonction en base R pour faire ça :-)
fu_date = pmin(death_date, censoring_date, na.rm=TRUE)

select(subjid, fu_status, fu_date) %>%
apply_labels(subjid = "Subject ID",
fu_status = "Status",
fu_date = "Date of last known status")

return(fu_data)
}



# Internals FU -----------------------------------------------------------------


#' Used in `example_fu()`
#' Extract the progression status of RECIST evaluation for each patient and return a tibble
#' @param recist a tibble with RECIST progression status for each patients
#' @return tibble with columns `subjid`, `prog_status` (binary variable : progressive disease or not)
#' @noRd
#' @keywords internal
#' @importFrom dplyr select summarise
.progression_status = function(recist) {

stopifnot(is.data.frame(recist))
if (!all(c("subjid", "rcresp") %in% names(recist))) {
stop("`recist` must contain columns `subjid`, and `rcresp` (RECIST global response).")
}

rcresp_levels = levels(recist$rcresp)
if (!"Progressive disease" %in% rcresp_levels) { stop("One level of `rcresp_levels` must be 'Progressive disease'. Found: ", paste(rcresp_levels, collapse = ", ")) }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tu es sûr de ton check ?
Si je comprends bien il faut que la fonction plante si aucun patient n'est progressif ?

PS: Tu pourrais simplifier ta condition en if(!any(recist$rcresp=="Progressive disease"))


progression_status = recist %>%
select(subjid, rcresp) %>%
summarise(status = any(rcresp == "Progressive disease"), .by = 'subjid') %>%
mutate(status = as_factor(if_else(!is.na(status), true = "Progressive disease", false = "No progression")))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[FACULTATIF]
Ça marche très bien comme ça, mais je te conseille plutôt d'utiliser des vecteurs logiques autant que possible.
Une colonne event_prog=!is.na(status) sera plus facile à gérer par la suite.
Et rien ne t'empêche de faire une autre colonne status_prog = factor(ifelse(event_prog, "Progressive disease", "No progression"))
Si on regarde ton algorithme global, tu crées un logical avec is.na, tu le transformes en factor, puis plus loin tu retransformes ton factor en logical avec ==.

}


#' Used in `example_fu()`
#' Assign a survival rate parameter (exponential lambda) for each patient, depending on treatment and progression status
#' @param arm factor vector of treatment arms (`Control` versus `Treatment`)
#' @param prog_status factor vector of progression status (`No progressive disease` versus `progressive disease`)
#' @param lambda_control scale parameter of an exponential baseline hazard function
#' @return numeric vector of rates (lambda) per subject, aligned with `arm` and `prog_status`
#' @noRd
#' @keywords internal
.surv_coef = function(arm, prog_status, lambda_control, beta_arm, beta_prog_status) {

if(is.null(arm)) {stop("`arm` is NULL.")}
if(is.null(prog_status)) {stop("`prog_status` is NULL.")}

if(!is.factor(arm)) arm = as_factor(arm)
if(!is.factor(prog_status)) prog_status = mutate(prog_status, status = as_factor(status))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Je crois que prog_status est forcément un factor à ce niveau


arm_levels = levels(arm)
n_arm = nlevels(arm)
if (!"Control" %in% arm_levels) { stop("One level of `arm` must be 'Control'. Found: ", paste(arm_levels, collapse = ", ")) }

prog_status_levels = levels(prog_status)
n_prog_status = nlevels(prog_status)
if (!"Progressive disease" %in% prog_status_levels) { stop("One level of `prog_status` must be 'Progressive disease'. Found: ", paste(prog_status_levels, collapse = ", ")) }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Idem supra, mais j'en profite pour placer le package cli qui fait de super belles erreurs :
cli_abort("One level of `prog_status` must be 'Progressive disease'. Found: {prog_status_levels}")



if(n_arm == 2 & n_prog_status == 2) { # "Control" versus "Treatment" and "Progressive disease" versus "No Progressive disease (stable or response)"

if (!"Treatment" %in% arm_levels) { stop("One level of `arm` must be 'Treatment'. Found: ", paste(arm_levels, collapse = ", ")) }

lambda = lambda_control * exp(beta_arm*(arm == "Treatment") + beta_prog_status*(prog_status == "Progressive disease"))

} else if(n_arm != 2) { stop("Wrong number of arms of treatment: ", n_arm, ". Only two arms of treatment are supported.")
} else if(n_prog_status != 2) { stop("Wrong number of progression status: ", n_prog_status, ". Progression status must be a binary variable.")}
Comment on lines +432 to +433
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pour la programmation défensive, on applique le concept du "fail early"

if(n_arm != 2) stop("Bad")
if(n_prog_status != 2) stop("Bad")
if(!"Treatment" %in% arm_levels) stop("Bad")
lambda = lambda_control *  ...

Vu que tu as stoppé si les inputs sont mauvais, tu n'as plus besoin de condition pour ton code.
C'est plus lisible et facile à maintenir.

Pro-tip : un else if sans else est souvent de mauvais présage


lambda
}


#' Used in `grstat_example()`
#' Remove recist evaluation rows posterior to the end of the follow-up
#' @param rtn list of data frames with enrolment, adverse event, recist evaluation and follow-up information
#' @noRd
#' @return list of data frames with enrolment, adverse event, recist evaluation and follow-up information with recist evaluation truncated with last follow-up date
#' @keywords internal
#' @importFrom dplyr left_join select filter
.remove_post_event = function(rtn){
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pas mal du tout !
Idem, pas forcément besoin de check vu qu'on contrôle complètement la fonction.

Du coup ça pose la question : est-ce qu'on ne devrait pas enlever aussi les AE ?


if(!is.list(rtn)) {stop("`rtn` must be a list of data frames.")}

if (! ("recist" %in% names(rtn)) ) { stop("One data frame in `rtn` list must be 'recist'. Found: ", names(rtn, collapse = ", ")) }
if (! ("fu" %in% names(rtn)) ) { stop("One data frame in `rtn` list must be 'fu'. Found: ", names(rtn, collapse = ", ")) }

rtn$recist <- rtn$recist %>%
left_join(rtn$fu %>% select(- crfname), by = join_by(subjid)) %>%
filter(rcdt <= fu_date) %>%
select(- c(fu_date, fu_status))

rtn
}


# Internals RC ------------------------------------------------------------

#' Used in `example_rc()`
Expand All @@ -300,7 +476,7 @@ example_rc = function(enrolres, seed,
percent_change_per_month > 0 ~ 1 / rc_coef_treatement,
.default = rc_coef_treatement)
percent_change_per_month = percent_change_per_month * coef
subj_delai = 42 * seq(rc_num_timepoints) + runif(rc_num_timepoints, -7, 7)
subj_delai = 15 * seq(rc_num_timepoints) + runif(rc_num_timepoints, -7, 7)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oui, c'est beaucoup mieux avec 15 !
Est-ce que tu pourrais en faire un argument ?

percent_change = percent_change_per_month * subj_delai / 30.5
percent_change = percent_change + rnorm(rc_num_timepoints, 0, rc_sd_tlsum_noise)
percent_change = c(0, percent_change)
Expand Down
1 change: 0 additions & 1 deletion grstat.Rproj
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Version: 1.0
ProjectId: d350bac8-4795-42a0-a32d-ccb93a467950

RestoreWorkspace: No
SaveWorkspace: No
Expand Down
43 changes: 43 additions & 0 deletions man/example_fu.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions tests/testthat/helper-zzz_chaRlotte.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
db = grstat_example()

data_km = db$enrolres %>%
left_join(db$fu, by = "subjid") %>%
mutate(time_OS = as.numeric(fu_date - date_inclusion)/365.25,
event_OS = 1*(fu_status == "Dead"))

km = survival::survfit(survival::Surv(time_OS, event_OS) ~ arm, data = data_km)
p = ggsurvfit::ggsurvfit(km) +
ggsurvfit::add_censor_mark() +
ggsurvfit::add_confidence_interval() +
ggsurvfit::add_risktable()

print(p)
Loading