diff --git a/NAMESPACE b/NAMESPACE index cade26547..8b96f9af2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -139,7 +139,6 @@ export(process_sc_all_alarms_telecare) export(process_sc_all_care_home) export(process_sc_all_home_care) export(process_sc_all_sds) -export(process_slf_deaths_lookup) export(process_tests_acute) export(process_tests_ae) export(process_tests_alarms_telecare) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 3d6d7802e..bf33f3e9f 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -168,7 +168,7 @@ create_episode_file <- function( ) %>% join_deaths_data( year, - slf_deaths_lookup + BYOC_MODE = BYOC_MODE ) %>% write_temp_data(year, file_name = "ep_temp5", write_temp_to_disk) %>% add_activity_after_death_flag(year, diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R index ae88c4b92..608ca932d 100644 --- a/R/get_slf_lookup_paths.R +++ b/R/get_slf_lookup_paths.R @@ -82,20 +82,31 @@ get_slf_deaths_lookup_path <- function(year, ...) { #' #' @param ... additional arguments passed to [get_file_path()] #' @param update the update month (defaults to use [latest_update()]) +#' @param BYOC_MODE BYOC_MODE, Boolean type #' #' @export #' @family slf lookup file path #' @seealso [get_file_path()] for the generic function. -get_combined_slf_deaths_lookup_path <- function(update = latest_update(), ...) { +get_combined_slf_deaths_lookup_path <- function(update = latest_update(), + BYOC_MODE = FALSE, + ...) { # Note this name is very similar to the existing slf_deaths_lookup_path which returns the path for # the refined_death with deceased flag for each financial year. # This function will return the combined financial # years lookup i.e. all years put together. - combined_slf_deaths_lookup_path <- get_file_path( - directory = fs::path(get_slf_dir(), "Deaths"), - file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet"), - ... - ) + if (isTRUE(BYOC_MODE)) { + combined_slf_deaths_lookup_path <- file.path( + denodo_output_path(), + "anon-combined_slf_deaths_lookup.parquet" + ) + } else { + combined_slf_deaths_lookup_path <- get_file_path( + directory = fs::path(get_slf_dir(), "Deaths"), + file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet"), + ... + ) + } + return(combined_slf_deaths_lookup_path) } @@ -112,7 +123,7 @@ get_combined_slf_deaths_lookup_path <- function(update = latest_update(), ...) { #' @export #' @family slf lookup file path #' @seealso [get_file_path()] for the generic function. -get_slf_chi_deaths_path <- function(update = latest_update(), ...) { +get_slf_chi_deaths_path <- function(update = latest_update(), BYOC_MODE, ...) { slf_chi_deaths_path <- get_file_path( directory = fs::path(get_slf_dir(), "Deaths"), file_name = stringr::str_glue("anon-chi_deaths_{update}.parquet"), diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R index 7a4d43fcb..42bc39c84 100644 --- a/R/join_deaths_data.R +++ b/R/join_deaths_data.R @@ -2,15 +2,23 @@ #' #' @param data Episode file data #' @param year financial year, e.g. '1920' -#' @param slf_deaths_lookup The SLF deaths lookup. +#' @param BYOC_MODE BYOC mode #' #' @return The data including the deaths lookup matched #' on to the episode file. join_deaths_data <- function( data, year, - slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) + BYOC_MODE = FALSE ) { + slf_deaths_lookup <- read_file(get_combined_slf_deaths_lookup_path(BYOC_MODE = BYOC_MODE)) %>% + # Filter the chi death dates to the FY as the lookup is by FY + dplyr::filter(fy == year) %>% + # use the BOXI NRS death date by default, but if it's missing, use the chi death date. + dplyr::mutate( + deceased = TRUE + ) + data <- data %>% dplyr::left_join( slf_deaths_lookup %>% diff --git a/R/process_extract_nrs_deaths.R b/R/process_extract_nrs_deaths.R index d80d4a750..e8def2368 100644 --- a/R/process_extract_nrs_deaths.R +++ b/R/process_extract_nrs_deaths.R @@ -6,13 +6,26 @@ #' @param data The extract to process #' @param year The year to process, in FY format. #' @param write_to_disk (optional) Should the data be written to disk default is +#' @param BYOC_MODE BYOC_MODE +#' @param run_id run_id for BYOC +#' @param run_date_time run_date_time for BYOC #' `TRUE` i.e. write the data to disk. #' #' @return the final data as a [tibble][tibble::tibble-package]. #' @export #' @family process extracts -process_extract_nrs_deaths <- function(data, year, write_to_disk = TRUE) { - log_slf_event(stage = "process", status = "start", type = "deaths", year = year) +process_extract_nrs_deaths <- function(data, + year, + write_to_disk = TRUE, + BYOC_MODE = FALSE, + run_id = NA, + run_date_time = NA) { + log_slf_event( + stage = "process", + status = "start", + type = "deaths", + year = year + ) stopifnot(length(year) == 1L) @@ -25,16 +38,32 @@ process_extract_nrs_deaths <- function(data, year, write_to_disk = TRUE) { year = year, gpprac = convert_eng_gpprac_to_dummy(.data$gpprac), smrtype = add_smrtype(.data$recid) + ) %>% + dplyr::mutate( + run_id = run_id, + run_date_time = run_date_time ) if (write_to_disk) { deaths_clean %>% - write_file(get_source_extract_path(year, "deaths", check_mode = "write"), + write_file( + get_source_extract_path( + year, + "deaths", + check_mode = "write", + BYOC_MODE = BYOC_MODE + ), + BYOC_MODE = BYOC_MODE, group_id = 3356 # sourcedev owner ) } - log_slf_event(stage = "process", status = "complete", type = "deaths", year = year) + log_slf_event( + stage = "process", + status = "complete", + type = "deaths", + year = year + ) return(deaths_clean) } diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R deleted file mode 100644 index 0482a6573..000000000 --- a/R/process_lookup_deaths.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Create the SLF Deaths lookup -#' -#' @description Use all-year refined death data to produce year-specific -#' slf_deaths_lookup with deceased flag added. -#' -#' @param year The year to process, in FY format. -#' @param refined_death refined death date combining nrs and it_chi. -#' @param write_to_disk (optional) Should the data be written to disk default is -#' `TRUE` i.e. write the data to disk. -#' -#' @return a [tibble][tibble::tibble-package] add deceased flag to deaths -#' @export -process_slf_deaths_lookup <- function( - year, - refined_death = read_file(get_combined_slf_deaths_lookup_path()), - write_to_disk = TRUE -) { - log_slf_event(stage = "process", status = "start", type = "slf_deaths_lookup", year = year) - - # create slf deaths lookup - slf_deaths_lookup <- refined_death %>% - # Filter the chi death dates to the FY as the lookup is by FY - dplyr::filter(fy == year) %>% - # use the BOXI NRS death date by default, but if it's missing, use the chi death date. - dplyr::mutate( - deceased = TRUE - ) - - if (write_to_disk) { - write_file( - slf_deaths_lookup, - get_slf_deaths_lookup_path(year, check_mode = "write"), - group_id = 3206 # hscdiip owner - ) - } - - log_slf_event(stage = "process", status = "complete", type = "slf_deaths_lookup", year = year) - - return(slf_deaths_lookup) -} diff --git a/R/process_refined_death.R b/R/process_refined_death.R index a2c0fccb0..c4b7968da 100644 --- a/R/process_refined_death.R +++ b/R/process_refined_death.R @@ -14,19 +14,33 @@ #' @family process extracts process_refined_death <- function( it_chi_deaths = read_file(get_slf_chi_deaths_path()), - write_to_disk = TRUE + write_to_disk = TRUE, + BYOC_MODE = FALSE, + run_id = NA, + run_date_time = NA ) { - log_slf_event(stage = "process", status = "start", type = "refined_death", year = "all") + log_slf_event( + stage = "process", + status = "start", + type = "refined_death", + year = "all" + ) years_list <- years_to_run() nrs_all_years <- lapply(years_list, (\(year) { read_extract_nrs_deaths( year, - get_boxi_extract_path(year, type = "deaths") + denodo_connect = get_denodo_connect(BYOC_MODE = BYOC_MODE), + get_boxi_extract_path(year, type = "deaths", BYOC_MODE = BYOC_MODE), + BYOC_MODE = BYOC_MODE ) %>% - process_extract_nrs_deaths(year, - write_to_disk = write_to_disk + process_extract_nrs_deaths( + year, + write_to_disk = write_to_disk, + BYOC_MODE = BYOC_MODE, + run_id = run_id, + run_date_time = run_date_time ) })) %>% data.table::rbindlist() @@ -59,16 +73,23 @@ process_refined_death <- function( dplyr::arrange(.data$death_date) %>% dplyr::distinct(.data$anon_chi, .keep_all = TRUE) %>% dplyr::ungroup() + # run_id and run_date_time should have been added in process_extract_nrs_deaths() if (write_to_disk) { write_file( refined_death, - get_combined_slf_deaths_lookup_path(create = TRUE), + get_combined_slf_deaths_lookup_path(create = TRUE, BYOC_MODE = BYOC_MODE), + BYOC_MODE = BYOC_MODE, group_id = 3206 # hscdiip owner ) } - log_slf_event(stage = "process", status = "complete", type = "refined_death", year = "all") + log_slf_event( + stage = "process", + status = "complete", + type = "refined_death", + year = "all" + ) return(refined_death) } diff --git a/R/read_extract_nrs_deaths.R b/R/read_extract_nrs_deaths.R index 8eaf4f264..706b3075e 100644 --- a/R/read_extract_nrs_deaths.R +++ b/R/read_extract_nrs_deaths.R @@ -5,68 +5,68 @@ #' @export read_extract_nrs_deaths <- function( year, - file_path = get_boxi_extract_path(year = year, type = "deaths") + denodo_connect = get_denodo_connect(BYOC_MODE = BYOC_MODE), + file_path = get_boxi_extract_path(year, type = "deaths", BYOC_MODE = BYOC_MODE), + BYOC_MODE ) { - log_slf_event(stage = "read", status = "start", type = "deaths", year = year) + year <- check_year_format(year, "fyyear") + c_year <- convert_fyyear_to_year(year) - extract_nrs_deaths <- read_file(file_path, - col_types = readr::cols_only( - "Death Location Code" = readr::col_character(), - "Geo Council Area Code" = readr::col_character(), - "Geo Postcode [C]" = readr::col_character(), - "Geo HSCP of Residence Code - current" = readr::col_character(), - "NHS Board of Occurrence Code - current" = readr::col_character(), - "NHS Board of Residence Code - current" = readr::col_character(), - "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"), - "Date of Death(99)" = readr::col_date(format = "%Y/%m/%d %T"), - "Pat Gender Code" = readr::col_double(), - "anon_chi" = readr::col_character(), - "Place Death Occurred Code" = readr::col_character(), - "Post Mortem Code" = readr::col_character(), - "Prim Cause of Death Code (6 char)" = readr::col_character(), - "Sec Cause of Death 0 Code (6 char)" = readr::col_character(), - "Sec Cause of Death 1 Code (6 char)" = readr::col_character(), - "Sec Cause of Death 2 Code (6 char)" = readr::col_character(), - "Sec Cause of Death 3 Code (6 char)" = readr::col_character(), - "Sec Cause of Death 4 Code (6 char)" = readr::col_character(), - "Sec Cause of Death 5 Code (6 char)" = readr::col_character(), - "Sec Cause of Death 6 Code (6 char)" = readr::col_character(), - "Sec Cause of Death 7 Code (6 char)" = readr::col_character(), - "Sec Cause of Death 8 Code (6 char)" = readr::col_character(), - "Sec Cause of Death 9 Code (6 char)" = readr::col_character(), - "Unique Record Identifier" = readr::col_character(), - "GP practice code(99)" = readr::col_character() - ) + log_slf_event( + stage = "read", + status = "start", + type = "deaths", + year = year + ) + + on.exit(try(DBI::dbDisconnect(denodo_connect), silent = TRUE), add = TRUE) + + extract_nrs_deaths <- dplyr::tbl( + denodo_connect, + # TODO: check table name + dbplyr::in_schema("sdl", "sdl_nrs_deaths_episodes") ) %>% - dplyr::rename( - death_location_code = "Death Location Code", - lca = "Geo Council Area Code", - postcode = "Geo Postcode [C]", - hscp = "Geo HSCP of Residence Code - current", - death_board_occurrence = "NHS Board of Occurrence Code - current", - hbrescode = "NHS Board of Residence Code - current", - dob = "Pat Date Of Birth [C]", - record_keydate1 = "Date of Death(99)", - gender = "Pat Gender Code", - anon_chi = "anon_chi", - place_death_occurred = "Place Death Occurred Code", - post_mortem = "Post Mortem Code", - deathdiag1 = "Prim Cause of Death Code (6 char)", - deathdiag2 = "Sec Cause of Death 0 Code (6 char)", - deathdiag3 = "Sec Cause of Death 1 Code (6 char)", - deathdiag4 = "Sec Cause of Death 2 Code (6 char)", - deathdiag5 = "Sec Cause of Death 3 Code (6 char)", - deathdiag6 = "Sec Cause of Death 4 Code (6 char)", - deathdiag7 = "Sec Cause of Death 5 Code (6 char)", - deathdiag8 = "Sec Cause of Death 6 Code (6 char)", - deathdiag9 = "Sec Cause of Death 7 Code (6 char)", - deathdiag10 = "Sec Cause of Death 8 Code (6 char)", - deathdiag11 = "Sec Cause of Death 9 Code (6 char)", - uri = "Unique Record Identifier", - gpprac = "GP practice code(99)" - ) + dplyr::select( + death_location_code = "death_location_code", + lca = "geo_council_area_code", + postcode = "geo_postcode", + hscp = "geo_hscp_of_residence_code_curr", + death_board_occurrence = "nhs_board_of_occurrence_code_curr", + hbrescode = "nhs_board_of_residence_code_curr", + dob = "patient_dob", + record_keydate1 = "patient_dod", + gender = "pat_gender_code", + chi = "patient_chi", + place_death_occurred = "place_death_occurred_code", + post_mortem = "post_mortem_code", + deathdiag1 = "primary_cause_of_death_code", + deathdiag2 = "secondary_cause_of_death_0_code", + deathdiag3 = "secondary_cause_of_death_1_code", + deathdiag4 = "secondary_cause_of_death_2_code", + deathdiag5 = "secondary_cause_of_death_3_code", + deathdiag6 = "secondary_cause_of_death_4_code", + deathdiag7 = "secondary_cause_of_death_5_code", + deathdiag8 = "secondary_cause_of_death_6_code", + deathdiag9 = "secondary_cause_of_death_7_code", + deathdiag10 = "secondary_cause_of_death_8_code", + deathdiag11 = "secondary_cause_of_death_9_code", + uri = "unique_record_identifier", + gpprac = "gp_practice_code", + date_of_death_financial_year = "date_of_death_financial_year" + ) %>% + dplyr::filter( + date_of_death_financial_year == c_year + ) %>% + dplyr::select(-"date_of_death_financial_year") %>% + dplyr::collect() %>% + slfhelper::get_anon_chi() - log_slf_event(stage = "read", status = "complete", type = "deaths", year = year) + log_slf_event( + stage = "read", + status = "complete", + type = "deaths", + year = year + ) return(extract_nrs_deaths) } diff --git a/_targets.R b/_targets.R index 2b0c964b0..9e2782478 100644 --- a/_targets.R +++ b/_targets.R @@ -286,7 +286,10 @@ list( refined_death_data, process_refined_death( it_chi_deaths = it_chi_deaths_data, - write_to_disk = write_to_disk + write_to_disk = write_to_disk, + BYOC_MODE = BYOC_MODE, + run_id = run_id, + run_date_time = run_date_time ) ), ### Social Care - 'All' data ----------------------------------------------- diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd index d85b125f9..a75397200 100644 --- a/man/create_episode_file.Rd +++ b/man/create_episode_file.Rd @@ -37,8 +37,6 @@ create_episode_file( \item{slf_gpprac_lookup}{The SLF GP Practice lookup} -\item{slf_deaths_lookup}{The SLF deaths lookup.} - \item{sc_client}{social care lookup file} \item{write_to_disk}{(optional) Should the data be written to disk default is diff --git a/man/get_combined_slf_deaths_lookup_path.Rd b/man/get_combined_slf_deaths_lookup_path.Rd index 709773d01..17bb7129e 100644 --- a/man/get_combined_slf_deaths_lookup_path.Rd +++ b/man/get_combined_slf_deaths_lookup_path.Rd @@ -4,11 +4,17 @@ \alias{get_combined_slf_deaths_lookup_path} \title{SLF death dates File Path} \usage{ -get_combined_slf_deaths_lookup_path(update = latest_update(), ...) +get_combined_slf_deaths_lookup_path( + update = latest_update(), + BYOC_MODE = FALSE, + ... +) } \arguments{ \item{update}{the update month (defaults to use \code{\link[=latest_update]{latest_update()}})} +\item{BYOC_MODE}{BYOC_MODE, Boolean type} + \item{...}{additional arguments passed to \code{\link[=get_file_path]{get_file_path()}}} } \description{ diff --git a/man/get_slf_chi_deaths_path.Rd b/man/get_slf_chi_deaths_path.Rd index 8ba115dfe..e5b20d339 100644 --- a/man/get_slf_chi_deaths_path.Rd +++ b/man/get_slf_chi_deaths_path.Rd @@ -4,7 +4,7 @@ \alias{get_slf_chi_deaths_path} \title{SLF CHI Deaths File Path} \usage{ -get_slf_chi_deaths_path(update = latest_update(), ...) +get_slf_chi_deaths_path(update = latest_update(), BYOC_MODE, ...) } \arguments{ \item{update}{The update month to use, diff --git a/man/join_deaths_data.Rd b/man/join_deaths_data.Rd index f3b68fe1a..128a144ef 100644 --- a/man/join_deaths_data.Rd +++ b/man/join_deaths_data.Rd @@ -4,18 +4,14 @@ \alias{join_deaths_data} \title{Join Deaths data} \usage{ -join_deaths_data( - data, - year, - slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) -) +join_deaths_data(data, year, BYOC_MODE = FALSE) } \arguments{ \item{data}{Episode file data} \item{year}{financial year, e.g. '1920'} -\item{slf_deaths_lookup}{The SLF deaths lookup.} +\item{BYOC_MODE}{BYOC mode} } \value{ The data including the deaths lookup matched diff --git a/man/process_extract_nrs_deaths.Rd b/man/process_extract_nrs_deaths.Rd index 71fab68e2..1b8ca1afc 100644 --- a/man/process_extract_nrs_deaths.Rd +++ b/man/process_extract_nrs_deaths.Rd @@ -4,14 +4,27 @@ \alias{process_extract_nrs_deaths} \title{Process the Nation Records of Scotland (NRS) Deaths extract} \usage{ -process_extract_nrs_deaths(data, year, write_to_disk = TRUE) +process_extract_nrs_deaths( + data, + year, + write_to_disk = TRUE, + BYOC_MODE = FALSE, + run_id = NA, + run_date_time = NA +) } \arguments{ \item{data}{The extract to process} \item{year}{The year to process, in FY format.} -\item{write_to_disk}{(optional) Should the data be written to disk default is +\item{write_to_disk}{(optional) Should the data be written to disk default is} + +\item{BYOC_MODE}{BYOC_MODE} + +\item{run_id}{run_id for BYOC} + +\item{run_date_time}{run_date_time for BYOC \code{TRUE} i.e. write the data to disk.} } \value{ diff --git a/man/process_refined_death.Rd b/man/process_refined_death.Rd index fd5392eb2..b3d27b1f4 100644 --- a/man/process_refined_death.Rd +++ b/man/process_refined_death.Rd @@ -6,7 +6,10 @@ \usage{ process_refined_death( it_chi_deaths = read_file(get_slf_chi_deaths_path()), - write_to_disk = TRUE + write_to_disk = TRUE, + BYOC_MODE = FALSE, + run_id = NA, + run_date_time = NA ) } \arguments{ diff --git a/man/process_slf_deaths_lookup.Rd b/man/process_slf_deaths_lookup.Rd deleted file mode 100644 index 80e7559e0..000000000 --- a/man/process_slf_deaths_lookup.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/process_lookup_deaths.R -\name{process_slf_deaths_lookup} -\alias{process_slf_deaths_lookup} -\title{Create the SLF Deaths lookup} -\usage{ -process_slf_deaths_lookup( - year, - refined_death = read_file(get_combined_slf_deaths_lookup_path()), - write_to_disk = TRUE -) -} -\arguments{ -\item{year}{The year to process, in FY format.} - -\item{refined_death}{refined death date combining nrs and it_chi.} - -\item{write_to_disk}{(optional) Should the data be written to disk default is -\code{TRUE} i.e. write the data to disk.} -} -\value{ -a \link[tibble:tibble-package]{tibble} add deceased flag to deaths -} -\description{ -Use all-year refined death data to produce year-specific -slf_deaths_lookup with deceased flag added. -} diff --git a/man/read_extract_nrs_deaths.Rd b/man/read_extract_nrs_deaths.Rd index 8b810aebd..9efd6f1dd 100644 --- a/man/read_extract_nrs_deaths.Rd +++ b/man/read_extract_nrs_deaths.Rd @@ -6,7 +6,9 @@ \usage{ read_extract_nrs_deaths( year, - file_path = get_boxi_extract_path(year = year, type = "deaths") + denodo_connect = get_denodo_connect(BYOC_MODE = BYOC_MODE), + file_path = get_boxi_extract_path(year, type = "deaths", BYOC_MODE = BYOC_MODE), + BYOC_MODE ) } \arguments{