Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ export(process_sc_all_alarms_telecare)
export(process_sc_all_care_home)
export(process_sc_all_home_care)
export(process_sc_all_sds)
export(process_slf_deaths_lookup)
export(process_tests_acute)
export(process_tests_ae)
export(process_tests_alarms_telecare)
Expand Down
2 changes: 1 addition & 1 deletion R/create_episode_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ create_episode_file <- function(
) %>%
join_deaths_data(
year,
slf_deaths_lookup
BYOC_MODE = BYOC_MODE
) %>%
write_temp_data(year, file_name = "ep_temp5", write_temp_to_disk) %>%
add_activity_after_death_flag(year,
Expand Down
25 changes: 18 additions & 7 deletions R/get_slf_lookup_paths.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,31 @@ get_slf_deaths_lookup_path <- function(year, ...) {
#'
#' @param ... additional arguments passed to [get_file_path()]
#' @param update the update month (defaults to use [latest_update()])
#' @param BYOC_MODE BYOC_MODE, Boolean type
#'
#' @export
#' @family slf lookup file path
#' @seealso [get_file_path()] for the generic function.
get_combined_slf_deaths_lookup_path <- function(update = latest_update(), ...) {
get_combined_slf_deaths_lookup_path <- function(update = latest_update(),
BYOC_MODE = FALSE,
...) {
# Note this name is very similar to the existing slf_deaths_lookup_path which returns the path for
# the refined_death with deceased flag for each financial year.
# This function will return the combined financial
# years lookup i.e. all years put together.
combined_slf_deaths_lookup_path <- get_file_path(
directory = fs::path(get_slf_dir(), "Deaths"),
file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet"),
...
)
if (isTRUE(BYOC_MODE)) {
combined_slf_deaths_lookup_path <- file.path(
denodo_output_path(),
"anon-combined_slf_deaths_lookup.parquet"
)
} else {
combined_slf_deaths_lookup_path <- get_file_path(
directory = fs::path(get_slf_dir(), "Deaths"),
file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet"),
...
)
}

return(combined_slf_deaths_lookup_path)
}

Expand All @@ -112,7 +123,7 @@ get_combined_slf_deaths_lookup_path <- function(update = latest_update(), ...) {
#' @export
#' @family slf lookup file path
#' @seealso [get_file_path()] for the generic function.
get_slf_chi_deaths_path <- function(update = latest_update(), ...) {
get_slf_chi_deaths_path <- function(update = latest_update(), BYOC_MODE, ...) {
slf_chi_deaths_path <- get_file_path(
directory = fs::path(get_slf_dir(), "Deaths"),
file_name = stringr::str_glue("anon-chi_deaths_{update}.parquet"),
Expand Down
12 changes: 10 additions & 2 deletions R/join_deaths_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,23 @@
#'
#' @param data Episode file data
#' @param year financial year, e.g. '1920'
#' @param slf_deaths_lookup The SLF deaths lookup.
#' @param BYOC_MODE BYOC mode
#'
#' @return The data including the deaths lookup matched
#' on to the episode file.
join_deaths_data <- function(
data,
year,
slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))
BYOC_MODE = FALSE
) {
slf_deaths_lookup <- read_file(get_combined_slf_deaths_lookup_path(BYOC_MODE = BYOC_MODE)) %>%
# Filter the chi death dates to the FY as the lookup is by FY
dplyr::filter(fy == year) %>%
# use the BOXI NRS death date by default, but if it's missing, use the chi death date.
dplyr::mutate(
deceased = TRUE
)

data <- data %>%
dplyr::left_join(
slf_deaths_lookup %>%
Expand Down
37 changes: 33 additions & 4 deletions R/process_extract_nrs_deaths.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,26 @@
#' @param data The extract to process
#' @param year The year to process, in FY format.
#' @param write_to_disk (optional) Should the data be written to disk default is
#' @param BYOC_MODE BYOC_MODE
#' @param run_id run_id for BYOC
#' @param run_date_time run_date_time for BYOC
#' `TRUE` i.e. write the data to disk.
#'
#' @return the final data as a [tibble][tibble::tibble-package].
#' @export
#' @family process extracts
process_extract_nrs_deaths <- function(data, year, write_to_disk = TRUE) {
log_slf_event(stage = "process", status = "start", type = "deaths", year = year)
process_extract_nrs_deaths <- function(data,
year,
write_to_disk = TRUE,
BYOC_MODE = FALSE,
run_id = NA,
run_date_time = NA) {
log_slf_event(
stage = "process",
status = "start",
type = "deaths",
year = year
)

stopifnot(length(year) == 1L)

Expand All @@ -25,16 +38,32 @@ process_extract_nrs_deaths <- function(data, year, write_to_disk = TRUE) {
year = year,
gpprac = convert_eng_gpprac_to_dummy(.data$gpprac),
smrtype = add_smrtype(.data$recid)
) %>%
dplyr::mutate(
run_id = run_id,
run_date_time = run_date_time
)

if (write_to_disk) {
deaths_clean %>%
write_file(get_source_extract_path(year, "deaths", check_mode = "write"),
write_file(
get_source_extract_path(
year,
"deaths",
check_mode = "write",
BYOC_MODE = BYOC_MODE
),
BYOC_MODE = BYOC_MODE,
group_id = 3356 # sourcedev owner
)
}

log_slf_event(stage = "process", status = "complete", type = "deaths", year = year)
log_slf_event(
stage = "process",
status = "complete",
type = "deaths",
year = year
)

return(deaths_clean)
}
40 changes: 0 additions & 40 deletions R/process_lookup_deaths.R

This file was deleted.

35 changes: 28 additions & 7 deletions R/process_refined_death.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,33 @@
#' @family process extracts
process_refined_death <- function(
it_chi_deaths = read_file(get_slf_chi_deaths_path()),
write_to_disk = TRUE
write_to_disk = TRUE,
BYOC_MODE = FALSE,
run_id = NA,
run_date_time = NA
) {
log_slf_event(stage = "process", status = "start", type = "refined_death", year = "all")
log_slf_event(
stage = "process",
status = "start",
type = "refined_death",
year = "all"
)

years_list <- years_to_run()

nrs_all_years <- lapply(years_list, (\(year) {
read_extract_nrs_deaths(
year,
get_boxi_extract_path(year, type = "deaths")
denodo_connect = get_denodo_connect(BYOC_MODE = BYOC_MODE),
get_boxi_extract_path(year, type = "deaths", BYOC_MODE = BYOC_MODE),
BYOC_MODE = BYOC_MODE
) %>%
process_extract_nrs_deaths(year,
write_to_disk = write_to_disk
process_extract_nrs_deaths(
year,
write_to_disk = write_to_disk,
BYOC_MODE = BYOC_MODE,
run_id = run_id,
run_date_time = run_date_time
)
})) %>%
data.table::rbindlist()
Expand Down Expand Up @@ -59,16 +73,23 @@ process_refined_death <- function(
dplyr::arrange(.data$death_date) %>%
dplyr::distinct(.data$anon_chi, .keep_all = TRUE) %>%
dplyr::ungroup()
# run_id and run_date_time should have been added in process_extract_nrs_deaths()

if (write_to_disk) {
write_file(
refined_death,
get_combined_slf_deaths_lookup_path(create = TRUE),
get_combined_slf_deaths_lookup_path(create = TRUE, BYOC_MODE = BYOC_MODE),
BYOC_MODE = BYOC_MODE,
group_id = 3206 # hscdiip owner
)
}

log_slf_event(stage = "process", status = "complete", type = "refined_death", year = "all")
log_slf_event(
stage = "process",
status = "complete",
type = "refined_death",
year = "all"
)

return(refined_death)
}
116 changes: 58 additions & 58 deletions R/read_extract_nrs_deaths.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,68 +5,68 @@
#' @export
read_extract_nrs_deaths <- function(
year,
file_path = get_boxi_extract_path(year = year, type = "deaths")
denodo_connect = get_denodo_connect(BYOC_MODE = BYOC_MODE),
file_path = get_boxi_extract_path(year, type = "deaths", BYOC_MODE = BYOC_MODE),
BYOC_MODE
) {
log_slf_event(stage = "read", status = "start", type = "deaths", year = year)
year <- check_year_format(year, "fyyear")
c_year <- convert_fyyear_to_year(year)

extract_nrs_deaths <- read_file(file_path,
col_types = readr::cols_only(
"Death Location Code" = readr::col_character(),
"Geo Council Area Code" = readr::col_character(),
"Geo Postcode [C]" = readr::col_character(),
"Geo HSCP of Residence Code - current" = readr::col_character(),
"NHS Board of Occurrence Code - current" = readr::col_character(),
"NHS Board of Residence Code - current" = readr::col_character(),
"Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
"Date of Death(99)" = readr::col_date(format = "%Y/%m/%d %T"),
"Pat Gender Code" = readr::col_double(),
"anon_chi" = readr::col_character(),
"Place Death Occurred Code" = readr::col_character(),
"Post Mortem Code" = readr::col_character(),
"Prim Cause of Death Code (6 char)" = readr::col_character(),
"Sec Cause of Death 0 Code (6 char)" = readr::col_character(),
"Sec Cause of Death 1 Code (6 char)" = readr::col_character(),
"Sec Cause of Death 2 Code (6 char)" = readr::col_character(),
"Sec Cause of Death 3 Code (6 char)" = readr::col_character(),
"Sec Cause of Death 4 Code (6 char)" = readr::col_character(),
"Sec Cause of Death 5 Code (6 char)" = readr::col_character(),
"Sec Cause of Death 6 Code (6 char)" = readr::col_character(),
"Sec Cause of Death 7 Code (6 char)" = readr::col_character(),
"Sec Cause of Death 8 Code (6 char)" = readr::col_character(),
"Sec Cause of Death 9 Code (6 char)" = readr::col_character(),
"Unique Record Identifier" = readr::col_character(),
"GP practice code(99)" = readr::col_character()
)
log_slf_event(
stage = "read",
status = "start",
type = "deaths",
year = year
)

on.exit(try(DBI::dbDisconnect(denodo_connect), silent = TRUE), add = TRUE)

extract_nrs_deaths <- dplyr::tbl(
denodo_connect,
# TODO: check table name
dbplyr::in_schema("sdl", "sdl_nrs_deaths_episodes")
) %>%
dplyr::rename(
death_location_code = "Death Location Code",
lca = "Geo Council Area Code",
postcode = "Geo Postcode [C]",
hscp = "Geo HSCP of Residence Code - current",
death_board_occurrence = "NHS Board of Occurrence Code - current",
hbrescode = "NHS Board of Residence Code - current",
dob = "Pat Date Of Birth [C]",
record_keydate1 = "Date of Death(99)",
gender = "Pat Gender Code",
anon_chi = "anon_chi",
place_death_occurred = "Place Death Occurred Code",
post_mortem = "Post Mortem Code",
deathdiag1 = "Prim Cause of Death Code (6 char)",
deathdiag2 = "Sec Cause of Death 0 Code (6 char)",
deathdiag3 = "Sec Cause of Death 1 Code (6 char)",
deathdiag4 = "Sec Cause of Death 2 Code (6 char)",
deathdiag5 = "Sec Cause of Death 3 Code (6 char)",
deathdiag6 = "Sec Cause of Death 4 Code (6 char)",
deathdiag7 = "Sec Cause of Death 5 Code (6 char)",
deathdiag8 = "Sec Cause of Death 6 Code (6 char)",
deathdiag9 = "Sec Cause of Death 7 Code (6 char)",
deathdiag10 = "Sec Cause of Death 8 Code (6 char)",
deathdiag11 = "Sec Cause of Death 9 Code (6 char)",
uri = "Unique Record Identifier",
gpprac = "GP practice code(99)"
)
dplyr::select(
death_location_code = "death_location_code",
lca = "geo_council_area_code",
postcode = "geo_postcode",
hscp = "geo_hscp_of_residence_code_curr",
death_board_occurrence = "nhs_board_of_occurrence_code_curr",
hbrescode = "nhs_board_of_residence_code_curr",
dob = "patient_dob",
record_keydate1 = "patient_dod",
gender = "pat_gender_code",
chi = "patient_chi",
place_death_occurred = "place_death_occurred_code",
post_mortem = "post_mortem_code",
deathdiag1 = "primary_cause_of_death_code",
deathdiag2 = "secondary_cause_of_death_0_code",
deathdiag3 = "secondary_cause_of_death_1_code",
deathdiag4 = "secondary_cause_of_death_2_code",
deathdiag5 = "secondary_cause_of_death_3_code",
deathdiag6 = "secondary_cause_of_death_4_code",
deathdiag7 = "secondary_cause_of_death_5_code",
deathdiag8 = "secondary_cause_of_death_6_code",
deathdiag9 = "secondary_cause_of_death_7_code",
deathdiag10 = "secondary_cause_of_death_8_code",
deathdiag11 = "secondary_cause_of_death_9_code",
uri = "unique_record_identifier",
gpprac = "gp_practice_code",
date_of_death_financial_year = "date_of_death_financial_year"
) %>%
dplyr::filter(
date_of_death_financial_year == c_year
) %>%
dplyr::select(-"date_of_death_financial_year") %>%
dplyr::collect() %>%
slfhelper::get_anon_chi()

log_slf_event(stage = "read", status = "complete", type = "deaths", year = year)
log_slf_event(
stage = "read",
status = "complete",
type = "deaths",
year = year
)

return(extract_nrs_deaths)
}
5 changes: 4 additions & 1 deletion _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,10 @@ list(
refined_death_data,
process_refined_death(
it_chi_deaths = it_chi_deaths_data,
write_to_disk = write_to_disk
write_to_disk = write_to_disk,
BYOC_MODE = BYOC_MODE,
run_id = run_id,
run_date_time = run_date_time
)
),
### Social Care - 'All' data -----------------------------------------------
Expand Down
Loading