|
| 1 | +#' @title Extract age from the CHI number |
| 2 | +#' |
| 3 | +#' @description `age_from_chi` takes a CHI number or a vector of CHI numbers |
| 4 | +#' and returns the age as implied by the CHI number(s). If the Date of Birth |
| 5 | +#' (DoB) is ambiguous it will return NA. It uses [dob_from_chi()]. |
| 6 | +#' |
| 7 | +#' @param chi_number a CHI number or a vector of CHI numbers with `character` |
| 8 | +#' class. |
| 9 | +#' @param ref_date calculate the age at this date, default is to use |
| 10 | +#' `Sys.Date()` i.e. today. |
| 11 | +#' @param min_age,max_age optional min and/or max dates that the DoB could take |
| 12 | +#' as the century needs to be guessed. |
| 13 | +#' Must be either length 1 for a 'fixed' age or the same length as `chi_number` |
| 14 | +#' for an age per CHI number. |
| 15 | +#' `min_age` can be age based on common sense in the dataset, whilst `max_age` |
| 16 | +#' can be age when an event happens such as the age at discharge. |
| 17 | +#' @param chi_check logical, optionally skip checking the CHI for validity which |
| 18 | +#' will be faster but should only be used if you have previously checked the |
| 19 | +#' CHI(s), the default (TRUE) will to check the CHI numbers. |
| 20 | +#' |
| 21 | +#' @return an integer vector of ages in years truncated to the nearest year. |
| 22 | +#' It will be the same length as `chi_number`. |
| 23 | +#' @export |
| 24 | +#' |
| 25 | +#' @examples |
| 26 | +#' age_from_chi("0101336489") |
| 27 | +#' |
| 28 | +#' library(tibble) |
| 29 | +#' library(dplyr) |
| 30 | +#' data <- tibble(chi = c( |
| 31 | +#' "0101336489", |
| 32 | +#' "0101405073", |
| 33 | +#' "0101625707" |
| 34 | +#' ), dis_date = as.Date(c( |
| 35 | +#' "1950-01-01", |
| 36 | +#' "2000-01-01", |
| 37 | +#' "2020-01-01" |
| 38 | +#' ))) |
| 39 | +#' |
| 40 | +#' data %>% |
| 41 | +#' mutate(chi_age = age_from_chi(chi)) |
| 42 | +#' |
| 43 | +#' data %>% |
| 44 | +#' mutate(chi_age = age_from_chi(chi, min_age = 18, max_age = 65)) |
| 45 | +#' |
| 46 | +#' data %>% |
| 47 | +#' mutate(chi_age = age_from_chi(chi, |
| 48 | +#' ref_date = dis_date |
| 49 | +#' )) |
| 50 | +age_from_chi <- function( |
| 51 | + chi_number, |
| 52 | + ref_date = NULL, |
| 53 | + min_age = 0L, |
| 54 | + max_age = NULL, |
| 55 | + chi_check = TRUE) { |
| 56 | + # Do type checking on the params |
| 57 | + if (!inherits(chi_number, "character")) { |
| 58 | + cli::cli_abort( |
| 59 | + "{.arg chi_number} must be a {.cls character} vector, not a {.cls {class(chi_number)}} vector." |
| 60 | + ) |
| 61 | + } |
| 62 | + |
| 63 | + if (!is.null(ref_date) && !inherits(ref_date, c("Date", "POSIXct"))) { |
| 64 | + cli::cli_abort( |
| 65 | + "{.arg ref_date} must be a {.cls Date} or {.cls POSIXct} vector, not a {.cls {class(ref_date)}} vector." |
| 66 | + ) |
| 67 | + } |
| 68 | + |
| 69 | + if (!inherits(min_age, c("integer", "numeric"))) { |
| 70 | + cli::cli_abort( |
| 71 | + "{.arg min_age} must be a {.cls integer} vector, not a {.cls {class(min_age)}} vector." |
| 72 | + ) |
| 73 | + } |
| 74 | + |
| 75 | + if (!is.null(max_age) && !inherits(max_age, c("integer", "numeric"))) { |
| 76 | + cli::cli_abort( |
| 77 | + "{.arg max_age} must be a {.cls integer} vector, not a {.cls {class(max_age)}} vector." |
| 78 | + ) |
| 79 | + } |
| 80 | + |
| 81 | + # Handle NULL and NA values in ref_date |
| 82 | + if (is.null(ref_date)) { |
| 83 | + ref_date <- Sys.Date() |
| 84 | + } else if (anyNA(ref_date)) { |
| 85 | + # If ref_date is a vector, fill in today's date where it's missing |
| 86 | + ref_date[is.na(ref_date)] <- Sys.Date() |
| 87 | + } |
| 88 | + |
| 89 | + n_chis <- length(chi_number) |
| 90 | + |
| 91 | + if (length(ref_date) != 1L) { |
| 92 | + if (n_chis != 1L && n_chis != length(ref_date)) { |
| 93 | + cli::cli_abort( |
| 94 | + "{.arg ref_date} must be size {length(chi_number)} (the same as {.arg chi_number}) not {length(ref_date)}." |
| 95 | + ) |
| 96 | + } else if (n_chis == 1L) { |
| 97 | + cli::cli_abort( |
| 98 | + "{.arg ref_date} must be size 1 (the same as {.arg chi_number}) not {length(ref_date)}." |
| 99 | + ) |
| 100 | + } |
| 101 | + } |
| 102 | + # Ensure ref_date is replicated if length 1 |
| 103 | + if (length(ref_date) == 1L && length(chi_number) > 1L) { |
| 104 | + ref_date <- rep(ref_date, length(chi_number)) |
| 105 | + } |
| 106 | + |
| 107 | + # Handle NULL and NA values in max_age |
| 108 | + if (is.null(max_age)) { |
| 109 | + # If max_age is NULL, set it to a very large number (e.g., age from 1900-01-01) |
| 110 | + # This corresponds to the default min_date behaviour in dob_from_chi |
| 111 | + max_age <- age_calculate(as.Date("1900-01-01"), ref_date) |
| 112 | + } else if (anyNA(max_age)) { |
| 113 | + # Ensure max_age is replicated if length 1 |
| 114 | + if (length(max_age) == 1L && length(chi_number) > 1L) { |
| 115 | + max_age <- rep(max_age, length(chi_number)) |
| 116 | + } |
| 117 | + |
| 118 | + # If max_age is a vector, fill in the age from 1900-01-01 where it's missing |
| 119 | + max_age[is.na(max_age)] <- age_calculate( |
| 120 | + as.Date("1900-01-01"), |
| 121 | + ref_date[is.na(max_age)] |
| 122 | + ) |
| 123 | + } |
| 124 | + |
| 125 | + if (length(max_age) != 1L) { |
| 126 | + if (n_chis != 1L && n_chis != length(max_age)) { |
| 127 | + cli::cli_abort( |
| 128 | + "{.arg max_age} must be size {length(chi_number)} (the same as {.arg chi_number}) not {length(max_age)}." |
| 129 | + ) |
| 130 | + } else if (n_chis == 1L) { |
| 131 | + cli::cli_abort( |
| 132 | + "{.arg max_age} must be size 1 (the same as {.arg chi_number}) not {length(max_age)}." |
| 133 | + ) |
| 134 | + } |
| 135 | + } |
| 136 | + |
| 137 | + if (length(min_age) != 1L) { |
| 138 | + if (n_chis != 1L && n_chis != length(min_age)) { |
| 139 | + cli::cli_abort( |
| 140 | + "{.arg min_age} must be size {length(chi_number)} (the same as {.arg chi_number}) not {length(min_age)}." |
| 141 | + ) |
| 142 | + } else if (n_chis == 1L) { |
| 143 | + cli::cli_abort( |
| 144 | + "{.arg min_age} must be size 1 (the same as {.arg chi_number}) not {length(min_age)}." |
| 145 | + ) |
| 146 | + } |
| 147 | + } |
| 148 | + |
| 149 | + # Handle NA values in min_age |
| 150 | + # If min_age is a vector, fill in 0 where it's missing |
| 151 | + if (anyNA(min_age)) { |
| 152 | + min_age[is.na(min_age)] <- 0L |
| 153 | + } |
| 154 | + |
| 155 | + # min and max ages are in a reasonable range |
| 156 | + if (any(min_age < 0L)) { |
| 157 | + cli::cli_abort("{.arg min_age} must be a positive integer.") |
| 158 | + } |
| 159 | + |
| 160 | + # Ensure min_age is replicated if length 1 |
| 161 | + if (length(min_age) == 1L && length(chi_number) > 1L) { |
| 162 | + min_age <- rep(min_age, length(chi_number)) |
| 163 | + } |
| 164 | + |
| 165 | + # Check max_age vs min_age after handling NAs |
| 166 | + if (any(max_age < min_age)) { |
| 167 | + cli::cli_abort( |
| 168 | + "{.arg max_age}, must always be greater than or equal to {.arg min_age}." |
| 169 | + ) |
| 170 | + } |
| 171 | + |
| 172 | + # Convert age ranges to date ranges relative to the reference date |
| 173 | + # NA values in ref_date, min_age, or max_age will propagate NA correctly here |
| 174 | + max_date.age <- ref_date - lubridate::years(min_age) |
| 175 | + min_date.age <- ref_date - lubridate::years(max_age) |
| 176 | + |
| 177 | + # Call dob_from_chi with the calculated date ranges |
| 178 | + guess_dob <- dob_from_chi( |
| 179 | + chi_number = chi_number, |
| 180 | + min_date = min_date.age, |
| 181 | + max_date = max_date.age, |
| 182 | + chi_check = chi_check |
| 183 | + ) |
| 184 | + |
| 185 | + # Calculate age from the guessed date of birth and reference date |
| 186 | + # NA values in guess_dob or ref_date will result in NA age |
| 187 | + guess_age <- age_calculate(guess_dob, ref_date) |
| 188 | + |
| 189 | + return(guess_age) |
| 190 | +} |
0 commit comments