Skip to content

Commit 5fd4994

Browse files
authored
Merge branch 'master' into age_calculate_fix
2 parents 9283761 + f35d316 commit 5fd4994

File tree

7 files changed

+1019
-257
lines changed

7 files changed

+1019
-257
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Authors@R: c(
1313
person("Graeme", "Gowans", , "Graeme.Gowans@phs.scot", role = "aut"),
1414
person("Alice", "Byers", role = "ctb"),
1515
person("Alan", "Yeung", , "Alan.Yeung@phs.scot", role = "ctb"),
16-
person("James", "McMahon", , "James.McMahon@phs.scot", role = "aut",
16+
person("James", "Hayes", , "James.Hayes2@phs.scot", role = "aut",
1717
comment = c(ORCID = "0000-0002-5380-2029")),
1818
person("Nicolaos", "Christofidis", , "nicolaos.christofidis@phs.scot", role = "aut")
1919
)

R/age_from_chi.R

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
#' @title Extract age from the CHI number
2+
#'
3+
#' @description `age_from_chi` takes a CHI number or a vector of CHI numbers
4+
#' and returns the age as implied by the CHI number(s). If the Date of Birth
5+
#' (DoB) is ambiguous it will return NA. It uses [dob_from_chi()].
6+
#'
7+
#' @param chi_number a CHI number or a vector of CHI numbers with `character`
8+
#' class.
9+
#' @param ref_date calculate the age at this date, default is to use
10+
#' `Sys.Date()` i.e. today.
11+
#' @param min_age,max_age optional min and/or max dates that the DoB could take
12+
#' as the century needs to be guessed.
13+
#' Must be either length 1 for a 'fixed' age or the same length as `chi_number`
14+
#' for an age per CHI number.
15+
#' `min_age` can be age based on common sense in the dataset, whilst `max_age`
16+
#' can be age when an event happens such as the age at discharge.
17+
#' @param chi_check logical, optionally skip checking the CHI for validity which
18+
#' will be faster but should only be used if you have previously checked the
19+
#' CHI(s), the default (TRUE) will to check the CHI numbers.
20+
#'
21+
#' @return an integer vector of ages in years truncated to the nearest year.
22+
#' It will be the same length as `chi_number`.
23+
#' @export
24+
#'
25+
#' @examples
26+
#' age_from_chi("0101336489")
27+
#'
28+
#' library(tibble)
29+
#' library(dplyr)
30+
#' data <- tibble(chi = c(
31+
#' "0101336489",
32+
#' "0101405073",
33+
#' "0101625707"
34+
#' ), dis_date = as.Date(c(
35+
#' "1950-01-01",
36+
#' "2000-01-01",
37+
#' "2020-01-01"
38+
#' )))
39+
#'
40+
#' data %>%
41+
#' mutate(chi_age = age_from_chi(chi))
42+
#'
43+
#' data %>%
44+
#' mutate(chi_age = age_from_chi(chi, min_age = 18, max_age = 65))
45+
#'
46+
#' data %>%
47+
#' mutate(chi_age = age_from_chi(chi,
48+
#' ref_date = dis_date
49+
#' ))
50+
age_from_chi <- function(
51+
chi_number,
52+
ref_date = NULL,
53+
min_age = 0L,
54+
max_age = NULL,
55+
chi_check = TRUE) {
56+
# Do type checking on the params
57+
if (!inherits(chi_number, "character")) {
58+
cli::cli_abort(
59+
"{.arg chi_number} must be a {.cls character} vector, not a {.cls {class(chi_number)}} vector."
60+
)
61+
}
62+
63+
if (!is.null(ref_date) && !inherits(ref_date, c("Date", "POSIXct"))) {
64+
cli::cli_abort(
65+
"{.arg ref_date} must be a {.cls Date} or {.cls POSIXct} vector, not a {.cls {class(ref_date)}} vector."
66+
)
67+
}
68+
69+
if (!inherits(min_age, c("integer", "numeric"))) {
70+
cli::cli_abort(
71+
"{.arg min_age} must be a {.cls integer} vector, not a {.cls {class(min_age)}} vector."
72+
)
73+
}
74+
75+
if (!is.null(max_age) && !inherits(max_age, c("integer", "numeric"))) {
76+
cli::cli_abort(
77+
"{.arg max_age} must be a {.cls integer} vector, not a {.cls {class(max_age)}} vector."
78+
)
79+
}
80+
81+
# Handle NULL and NA values in ref_date
82+
if (is.null(ref_date)) {
83+
ref_date <- Sys.Date()
84+
} else if (anyNA(ref_date)) {
85+
# If ref_date is a vector, fill in today's date where it's missing
86+
ref_date[is.na(ref_date)] <- Sys.Date()
87+
}
88+
89+
n_chis <- length(chi_number)
90+
91+
if (length(ref_date) != 1L) {
92+
if (n_chis != 1L && n_chis != length(ref_date)) {
93+
cli::cli_abort(
94+
"{.arg ref_date} must be size {length(chi_number)} (the same as {.arg chi_number}) not {length(ref_date)}."
95+
)
96+
} else if (n_chis == 1L) {
97+
cli::cli_abort(
98+
"{.arg ref_date} must be size 1 (the same as {.arg chi_number}) not {length(ref_date)}."
99+
)
100+
}
101+
}
102+
# Ensure ref_date is replicated if length 1
103+
if (length(ref_date) == 1L && length(chi_number) > 1L) {
104+
ref_date <- rep(ref_date, length(chi_number))
105+
}
106+
107+
# Handle NULL and NA values in max_age
108+
if (is.null(max_age)) {
109+
# If max_age is NULL, set it to a very large number (e.g., age from 1900-01-01)
110+
# This corresponds to the default min_date behaviour in dob_from_chi
111+
max_age <- age_calculate(as.Date("1900-01-01"), ref_date)
112+
} else if (anyNA(max_age)) {
113+
# Ensure max_age is replicated if length 1
114+
if (length(max_age) == 1L && length(chi_number) > 1L) {
115+
max_age <- rep(max_age, length(chi_number))
116+
}
117+
118+
# If max_age is a vector, fill in the age from 1900-01-01 where it's missing
119+
max_age[is.na(max_age)] <- age_calculate(
120+
as.Date("1900-01-01"),
121+
ref_date[is.na(max_age)]
122+
)
123+
}
124+
125+
if (length(max_age) != 1L) {
126+
if (n_chis != 1L && n_chis != length(max_age)) {
127+
cli::cli_abort(
128+
"{.arg max_age} must be size {length(chi_number)} (the same as {.arg chi_number}) not {length(max_age)}."
129+
)
130+
} else if (n_chis == 1L) {
131+
cli::cli_abort(
132+
"{.arg max_age} must be size 1 (the same as {.arg chi_number}) not {length(max_age)}."
133+
)
134+
}
135+
}
136+
137+
if (length(min_age) != 1L) {
138+
if (n_chis != 1L && n_chis != length(min_age)) {
139+
cli::cli_abort(
140+
"{.arg min_age} must be size {length(chi_number)} (the same as {.arg chi_number}) not {length(min_age)}."
141+
)
142+
} else if (n_chis == 1L) {
143+
cli::cli_abort(
144+
"{.arg min_age} must be size 1 (the same as {.arg chi_number}) not {length(min_age)}."
145+
)
146+
}
147+
}
148+
149+
# Handle NA values in min_age
150+
# If min_age is a vector, fill in 0 where it's missing
151+
if (anyNA(min_age)) {
152+
min_age[is.na(min_age)] <- 0L
153+
}
154+
155+
# min and max ages are in a reasonable range
156+
if (any(min_age < 0L)) {
157+
cli::cli_abort("{.arg min_age} must be a positive integer.")
158+
}
159+
160+
# Ensure min_age is replicated if length 1
161+
if (length(min_age) == 1L && length(chi_number) > 1L) {
162+
min_age <- rep(min_age, length(chi_number))
163+
}
164+
165+
# Check max_age vs min_age after handling NAs
166+
if (any(max_age < min_age)) {
167+
cli::cli_abort(
168+
"{.arg max_age}, must always be greater than or equal to {.arg min_age}."
169+
)
170+
}
171+
172+
# Convert age ranges to date ranges relative to the reference date
173+
# NA values in ref_date, min_age, or max_age will propagate NA correctly here
174+
max_date.age <- ref_date - lubridate::years(min_age)
175+
min_date.age <- ref_date - lubridate::years(max_age)
176+
177+
# Call dob_from_chi with the calculated date ranges
178+
guess_dob <- dob_from_chi(
179+
chi_number = chi_number,
180+
min_date = min_date.age,
181+
max_date = max_date.age,
182+
chi_check = chi_check
183+
)
184+
185+
# Calculate age from the guessed date of birth and reference date
186+
# NA values in guess_dob or ref_date will result in NA age
187+
guess_age <- age_calculate(guess_dob, ref_date)
188+
189+
return(guess_age)
190+
}

0 commit comments

Comments
 (0)