diff --git a/R/process_extract_ae.R b/R/process_extract_ae.R index d1bcaec9f..329254136 100644 --- a/R/process_extract_ae.R +++ b/R/process_extract_ae.R @@ -12,7 +12,13 @@ #' @return the final data as a [tibble][tibble::tibble-package]. #' @export #' @family process extracts -process_extract_ae <- function(data, year, write_to_disk = TRUE) { +process_extract_ae <- function(data, + year, + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + write_to_disk = TRUE, + BYOC_MODE = FALSE, + run_id = NA, + run_date_time = NA) { log_slf_event(stage = "process", status = "start", type = "ae", year = year) # Only run for a single year @@ -223,12 +229,34 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { cup_pathway = "CUP Pathway Name" ) + # ---------------------------------------------------------------------------- + # c_year_cup <- convert_fyyear_to_year(check_year_format(year)) + # + # on.exit(try(DBI::dbDisconnect(denodo_connect), silent = TRUE), add = TRUE) + # + # ae_cup_file <- dplyr::tbl( + # denodo_connect, + # dbplyr::in_schema("sdl", "sdl_ae_cup_source_placeholder") # TO-DO: Placeholder for data path in denodo + # ) %>% + # dplyr::filter(year == c_year_cup) %>% # TO-DO: Placeholder for the variable to filter by year + # dplyr::select( + # record_keydate1 = "ed_arrival_date", + # keytime1 = "ed_arrival_time", + # record_keydate2 = "ed_discharge_date", + # keytime2 = "ed_discharge_time", + # case_ref_number = "ed_case_reference_number", + # cup_marker = "cup_marker", + # cup_pathway = "cup_pathway_name" + # ) %>% + # dplyr::collect() + # ---------------------------------------------------------------------------- # Data Cleaning--------------------------------------- ae_cup_clean <- ae_cup_file %>% # Remove any duplicates - dplyr::distinct(.data$record_keydate1, + dplyr::distinct( + .data$record_keydate1, .data$keytime1, .data$record_keydate2, .data$keytime2, @@ -256,7 +284,13 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { ) ae_processed <- matched_ae_data %>% + dplyr::mutate( + run_id = run_id, + run_date_time = run_date_time + ) %>% dplyr::select( + "run_id", + "run_date_time", "year", "recid", "smrtype", @@ -312,7 +346,8 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { if (write_to_disk) { write_file( ae_processed, - get_source_extract_path(year, "ae", check_mode = "write"), + get_source_extract_path(year, "ae", check_mode = "write", BYOC_MODE = BYOC_MODE), + BYOC_MODE = BYOC_MODE, group_id = 3356 # sourcedev owner ) } diff --git a/R/read_extract_ae.R b/R/read_extract_ae.R index 708d743ab..6067b017f 100644 --- a/R/read_extract_ae.R +++ b/R/read_extract_ae.R @@ -6,90 +6,84 @@ #' read_extract_ae <- function( year, - file_path = get_boxi_extract_path(year = year, type = "ae") + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + file_path = get_boxi_extract_path(year = year, type = "ae", BYOC_MODE), + BYOC_MODE ) { log_slf_event(stage = "read", status = "start", type = "ae", year = year) - extract_ae <- read_file(file_path, - col_type = readr::cols( - "Arrival Date" = readr::col_date(format = "%Y/%m/%d %T"), - "DAT Date" = readr::col_date(format = "%Y/%m/%d %T"), - "anon_chi" = readr::col_character(), - "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"), - "Pat Gender Code" = readr::col_double(), - "NHS Board of Residence Code - current" = readr::col_character(), - "Treatment NHS Board Code - current" = readr::col_character(), - "Treatment Location Code" = readr::col_character(), - "GP Practice Code" = readr::col_character(), - "Council Area Code" = readr::col_character(), - "Postcode (epi) [C]" = readr::col_character(), - "Postcode (CHI) [C]" = readr::col_character(), - "HSCP of Residence Code - current" = readr::col_character(), - "Arrival Time" = readr::col_time(""), - "DAT Time" = readr::col_time(""), - "Arrival Mode Code" = readr::col_character(), - "Referral Source Code" = readr::col_character(), - "Attendance Category Code" = readr::col_character(), - "Discharge Destination Code" = readr::col_character(), - "Patient Flow Code" = readr::col_double(), - "Place of Incident Code" = readr::col_character(), - "Reason for Wait Code" = readr::col_character(), - "Disease 1 Code" = readr::col_character(), - "Disease 2 Code" = readr::col_character(), - "Disease 3 Code" = readr::col_character(), - "Bodily Location Of Injury Code" = readr::col_character(), - "Alcohol Involved Code" = readr::col_character(), - "Alcohol Related Admission" = readr::col_character(), - "Substance Misuse Related Admission" = readr::col_character(), - "Falls Related Admission" = readr::col_character(), - "Self Harm Related Admission" = readr::col_character(), - "Total Net Costs" = readr::col_double(), - "Age at Midpoint of Financial Year" = readr::col_double(), - "Case Reference Number" = readr::col_character(), - "Significant Facility Code" = readr::col_character(), - "Community Hospital Flag" = readr::col_character(), - ) + year <- check_year_format(year, format = "fyyear") + c_year <- convert_fyyear_to_year(year) + + # Specify years available for running + if (file_path == get_dummy_boxi_extract_path(BYOC_MODE = BYOC_MODE)) { + return(tibble::tibble()) + } + + on.exit(try(DBI::dbDisconnect(denodo_connect), silent = TRUE), add = TRUE) + + # Read Extract + extract_ae <- dplyr::tbl( + denodo_connect, + dbplyr::in_schema("sdl", "sdl_ae2_episode_level_source") ) %>% - # rename variables - dplyr::rename( - record_keydate1 = "Arrival Date", - record_keydate2 = "DAT Date", - dob = "Pat Date Of Birth [C]", - postcode_epi = "Postcode (epi) [C]", - postcode_chi = "Postcode (CHI) [C]", - age = "Age at Midpoint of Financial Year", - ae_alcohol = "Alcohol Involved Code", - alcohol_adm = "Alcohol Related Admission", - ae_arrivalmode = "Arrival Mode Code", - keytime1 = "Arrival Time", - ae_attendcat = "Attendance Category Code", - ae_bodyloc = "Bodily Location Of Injury Code", - lca = "Council Area Code", - ae_disdest = "Discharge Destination Code", - keytime2 = "DAT Time", - diag1 = "Disease 1 Code", - diag2 = "Disease 2 Code", - diag3 = "Disease 3 Code", - falls_adm = "Falls Related Admission", - gpprac = "GP Practice Code", - hscp = "HSCP of Residence Code - current", - hbrescode = "NHS Board of Residence Code - current", - hbtreatcode = "Treatment NHS Board Code - current", - anon_chi = "anon_chi", - gender = "Pat Gender Code", - ae_patflow = "Patient Flow Code", - ae_placeinc = "Place of Incident Code", - ae_reasonwait = "Reason for Wait Code", - refsource = "Referral Source Code", - selfharm_adm = "Self Harm Related Admission", - submis_adm = "Substance Misuse Related Admission", - sigfac = "Significant Facility Code", - cost_total_net = "Total Net Costs", - location = "Treatment Location Code", - case_ref_number = "Case Reference Number", - commhosp = "Community Hospital Flag" - ) + dplyr::filter( + financial_year == c_year, # TO-DO: check assumption that arrival_financial_year == financial_year + significant_facility_code == "32" | is.na(significant_facility_code) + ) %>% + dplyr::select( + record_keydate1 = "arrival_date", + record_keydate2 = "dat_date", + keytime1 = "arrival_time", + keytime2 = "dat_time", + chi = "patient_chi", + gender = "patient_sex", + dob = "patient_dob", + gpprac = "gp_practice_code", + lca = "council_area_code", + hscp = "hscp_of_residence_code_curr", + location = "treatment_location_code", + hbrescode = "nhs_board_of_residence_code_curr", + hbtreatcode = "treatment_nhs_board_code_curr", + diag1 = "disease_1_code", + diag2 = "disease_2_code", + diag3 = "disease_3_code", + ae_arrivalmode = "arrival_mode_code", + refsource = "referral_source_code", + sigfac = "significant_facility_code", + ae_attendcat = "attendance_category_code", + ae_disdest = "discharge_destination_code", + ae_patflow = "patient_flow_code", + ae_placeinc = "place_of_incident_code", + ae_reasonwait = "reason_for_wait_code", + ae_bodyloc = "bodily_location_of_injury_code", + ae_alcohol = "alcohol_involved_code", + alcohol_adm = "alcohol_related_admission", + submis_adm = "substance_misuse_related_admission", + falls_adm = "falls_related_admission", + selfharm_adm = "self_harm_related_admission", + cost_total_net = "total_net_cost", + age = "age_at_midpoint_of_financial_year", + case_ref_number = "care_reference_number", # TO-DO: needs to be renamed by NSS from care to case? + postcode_epi = "postcode_epi", + postcode_chi = "postcode_chi", + commhosp = "community_hospital_flag" + ) %>% + dplyr::collect() %>% + slfhelper::get_anon_chi("chi") + # ---------------------------------------------------------------------------- + # TO-DO: Remove this when data types are fixed + extract_ae <- extract_ae %>% + mutate( + keytime1 = hms::parse_hms(keytime1), + keytime2 = hms::parse_hms(keytime2), + gender = as.numeric(gender), + ae_patflow = as.numeric(ae_patflow), + age = as.numeric(age), + commhosp = as.character(commhosp) + ) + # ---------------------------------------------------------------------------- log_slf_event(stage = "read", status = "complete", type = "ae", year = year) return(extract_ae) diff --git a/SDL_process/run_sdl.r b/SDL_process/run_sdl.r index d8185f52a..2d691919f 100644 --- a/SDL_process/run_sdl.r +++ b/SDL_process/run_sdl.r @@ -162,7 +162,7 @@ year <- "1920" # Build BYOC Output File Paths byoc_output_files <- get_byoc_output_files( year = year, - types = c("homelessness", "maternity") # using homelessness for test purpose. When development is complete, we change to "types = "byoc_input_files"" + types = c("homelessness", "maternity", "ae") # using homelessness for test purpose. When development is complete, we change to "types = "byoc_input_files"" ) # can always use any other type for testing also ## targets ---- @@ -170,7 +170,7 @@ targets::tar_make(script = "dummy_targets.R") logger::log_info("Targets finished.") # targets::tar_make() -# test homelessness data only +# Test homelessness data ## create homelessness data ---- logger::log_info("Read and process homelessness data") hl1 <- read_extract_homelessness( @@ -192,6 +192,7 @@ hl1 <- read_extract_homelessness( run_date_time = run_date_time ) +# Test maternity data logger::log_info("Read and process maternity data") maternity <- read_extract_maternity( year, @@ -207,4 +208,21 @@ maternity <- read_extract_maternity( run_date_time = run_date_time ) +# Test ae data +logger::log_info("Read and process A&E data") +ae <- read_extract_ae( + year, + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + file_path = get_boxi_extract_path(year, type = "ae", BYOC_MODE), + BYOC_MODE = BYOC_MODE +) %>% + process_extract_ae( + year = year, + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + write_to_disk = TRUE, + BYOC_MODE = BYOC_MODE, + run_id = run_id, + run_date_time = run_date_time + ) + logger::log_info("Run SDL ended.") diff --git a/_targets.R b/_targets.R index 2b0c964b0..5445135b6 100644 --- a/_targets.R +++ b/_targets.R @@ -476,12 +476,16 @@ list( ), # Accident & Emergency (AE2) activity -------------------------------------- # READ - A&E - tar_file_read( + tar_target( # Target name ae_data, - get_boxi_extract_path(year, type = "ae"), # Function - read_extract_ae(year, !!.x) + read_extract_ae( + year = year, + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + file_path = get_boxi_extract_path(year = year, type = "ae", BYOC_MODE), + BYOC_MODE = BYOC_MODE + ) ), # PROCESS - A&E tar_target( @@ -489,9 +493,13 @@ list( source_ae_extract, # Function process_extract_ae( - ae_data, - year, - write_to_disk = write_to_disk + data = ae_data, + year = year, + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + write_to_disk = write_to_disk, + BYOC_MODE = BYOC_MODE, + run_id = run_id, + run_date_time = run_date_time ) ), # TESTS - A&E diff --git a/man/process_extract_ae.Rd b/man/process_extract_ae.Rd index 36d2bb4d3..a44cecb3a 100644 --- a/man/process_extract_ae.Rd +++ b/man/process_extract_ae.Rd @@ -4,7 +4,15 @@ \alias{process_extract_ae} \title{Process the A&E extract} \usage{ -process_extract_ae(data, year, write_to_disk = TRUE) +process_extract_ae( + data, + year, + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + write_to_disk = TRUE, + BYOC_MODE = FALSE, + run_id = NA, + run_date_time = NA +) } \arguments{ \item{data}{The extract to process} diff --git a/man/read_extract_ae.Rd b/man/read_extract_ae.Rd index 1a15efbc1..390842e54 100644 --- a/man/read_extract_ae.Rd +++ b/man/read_extract_ae.Rd @@ -6,7 +6,9 @@ \usage{ read_extract_ae( year, - file_path = get_boxi_extract_path(year = year, type = "ae") + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + file_path = get_boxi_extract_path(year = year, type = "ae", BYOC_MODE), + BYOC_MODE ) } \arguments{