Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions R/process_extract_ae.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@
#' @return the final data as a [tibble][tibble::tibble-package].
#' @export
#' @family process extracts
process_extract_ae <- function(data, year, write_to_disk = TRUE) {
process_extract_ae <- function(data,
year,
denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),

Check warning on line 17 in R/process_extract_ae.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/process_extract_ae.R,line=17,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 93 characters.
write_to_disk = TRUE,
BYOC_MODE = FALSE,

Check warning on line 19 in R/process_extract_ae.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/process_extract_ae.R,line=19,col=32,[object_name_linter] Variable and function name style should match snake_case or symbols.
run_id = NA,
run_date_time = NA) {
log_slf_event(stage = "process", status = "start", type = "ae", year = year)

# Only run for a single year
Expand All @@ -23,24 +29,24 @@

# Data Cleaning ---------------------------------------

ae_clean <- data %>%

Check warning on line 32 in R/process_extract_ae.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/process_extract_ae.R,line=32,col=20,[pipe_consistency_linter] Use the |> pipe operator instead of the %>% pipe operator.
# year variable
dplyr::mutate(
year = year,
recid = "AE2"
) %>%

Check warning on line 37 in R/process_extract_ae.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/process_extract_ae.R,line=37,col=7,[pipe_consistency_linter] Use the |> pipe operator instead of the %>% pipe operator.
# Recode GP Practice
dplyr::mutate(gpprac = convert_eng_gpprac_to_dummy(.data$gpprac)) %>%

Check warning on line 39 in R/process_extract_ae.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/process_extract_ae.R,line=39,col=71,[pipe_consistency_linter] Use the |> pipe operator instead of the %>% pipe operator.
# use the CHI postcode and if that is blank, then use the epi postcode.
dplyr::mutate(postcode = dplyr::if_else(
!is.na(.data$postcode_chi),
.data$postcode_chi,
.data$postcode_epi
)) %>%

Check warning on line 45 in R/process_extract_ae.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/process_extract_ae.R,line=45,col=8,[pipe_consistency_linter] Use the |> pipe operator instead of the %>% pipe operator.
# A&E data has postcode in PC8 format but we need it in PC7 format
dplyr::mutate(
postcode = phsmethods::format_postcode(.data$postcode, "pc7")
) %>%

Check warning on line 49 in R/process_extract_ae.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/process_extract_ae.R,line=49,col=7,[pipe_consistency_linter] Use the |> pipe operator instead of the %>% pipe operator.
## recode cypher HB codes ##
dplyr::mutate(
dplyr::across(c("hbtreatcode", "hbrescode"), ~ dplyr::case_when(
Expand All @@ -59,15 +65,15 @@
.x == "Y" ~ "S08000017",
.x == "Z" ~ "S08000026"
))
) %>%

Check warning on line 68 in R/process_extract_ae.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/process_extract_ae.R,line=68,col=7,[pipe_consistency_linter] Use the |> pipe operator instead of the %>% pipe operator.
## Allocate the costs to the correct month ##
# Create month variable
dplyr::mutate(
month = strftime(.data$record_keydate1, "%m"),
smrtype = add_smrtype(.data$recid)
) %>%

Check warning on line 74 in R/process_extract_ae.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/process_extract_ae.R,line=74,col=7,[pipe_consistency_linter] Use the |> pipe operator instead of the %>% pipe operator.
# Allocate the costs to the correct month
create_day_episode_costs(.data$record_keydate1, .data$cost_total_net) %>%

Check warning on line 76 in R/process_extract_ae.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/process_extract_ae.R,line=76,col=75,[pipe_consistency_linter] Use the |> pipe operator instead of the %>% pipe operator.
# clean up commhosp values
# dplyr::mutate(commhosp = dplyr::if_else(.data$commhosp == 1L, "Y", "N"))
# Reset community hospital flag as an integer
Expand Down Expand Up @@ -223,12 +229,34 @@
cup_pathway = "CUP Pathway Name"
)

# ----------------------------------------------------------------------------
# c_year_cup <- convert_fyyear_to_year(check_year_format(year))
#
# on.exit(try(DBI::dbDisconnect(denodo_connect), silent = TRUE), add = TRUE)
#
# ae_cup_file <- dplyr::tbl(
# denodo_connect,
# dbplyr::in_schema("sdl", "sdl_ae_cup_source_placeholder") # TO-DO: Placeholder for data path in denodo
# ) %>%
# dplyr::filter(year == c_year_cup) %>% # TO-DO: Placeholder for the variable to filter by year
# dplyr::select(
# record_keydate1 = "ed_arrival_date",
# keytime1 = "ed_arrival_time",
# record_keydate2 = "ed_discharge_date",
# keytime2 = "ed_discharge_time",
# case_ref_number = "ed_case_reference_number",
# cup_marker = "cup_marker",
# cup_pathway = "cup_pathway_name"
# ) %>%
# dplyr::collect()
# ----------------------------------------------------------------------------

# Data Cleaning---------------------------------------

ae_cup_clean <- ae_cup_file %>%
# Remove any duplicates
dplyr::distinct(.data$record_keydate1,
dplyr::distinct(
.data$record_keydate1,
.data$keytime1,
.data$record_keydate2,
.data$keytime2,
Expand Down Expand Up @@ -256,7 +284,13 @@
)

ae_processed <- matched_ae_data %>%
dplyr::mutate(
run_id = run_id,
run_date_time = run_date_time
) %>%
dplyr::select(
"run_id",
"run_date_time",
"year",
"recid",
"smrtype",
Expand Down Expand Up @@ -312,7 +346,8 @@
if (write_to_disk) {
write_file(
ae_processed,
get_source_extract_path(year, "ae", check_mode = "write"),
get_source_extract_path(year, "ae", check_mode = "write", BYOC_MODE = BYOC_MODE),
BYOC_MODE = BYOC_MODE,
group_id = 3356 # sourcedev owner
)
}
Expand Down
152 changes: 73 additions & 79 deletions R/read_extract_ae.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,90 +6,84 @@
#'
read_extract_ae <- function(
year,
file_path = get_boxi_extract_path(year = year, type = "ae")
denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
file_path = get_boxi_extract_path(year = year, type = "ae", BYOC_MODE),
BYOC_MODE
) {
log_slf_event(stage = "read", status = "start", type = "ae", year = year)

extract_ae <- read_file(file_path,
col_type = readr::cols(
"Arrival Date" = readr::col_date(format = "%Y/%m/%d %T"),
"DAT Date" = readr::col_date(format = "%Y/%m/%d %T"),
"anon_chi" = readr::col_character(),
"Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
"Pat Gender Code" = readr::col_double(),
"NHS Board of Residence Code - current" = readr::col_character(),
"Treatment NHS Board Code - current" = readr::col_character(),
"Treatment Location Code" = readr::col_character(),
"GP Practice Code" = readr::col_character(),
"Council Area Code" = readr::col_character(),
"Postcode (epi) [C]" = readr::col_character(),
"Postcode (CHI) [C]" = readr::col_character(),
"HSCP of Residence Code - current" = readr::col_character(),
"Arrival Time" = readr::col_time(""),
"DAT Time" = readr::col_time(""),
"Arrival Mode Code" = readr::col_character(),
"Referral Source Code" = readr::col_character(),
"Attendance Category Code" = readr::col_character(),
"Discharge Destination Code" = readr::col_character(),
"Patient Flow Code" = readr::col_double(),
"Place of Incident Code" = readr::col_character(),
"Reason for Wait Code" = readr::col_character(),
"Disease 1 Code" = readr::col_character(),
"Disease 2 Code" = readr::col_character(),
"Disease 3 Code" = readr::col_character(),
"Bodily Location Of Injury Code" = readr::col_character(),
"Alcohol Involved Code" = readr::col_character(),
"Alcohol Related Admission" = readr::col_character(),
"Substance Misuse Related Admission" = readr::col_character(),
"Falls Related Admission" = readr::col_character(),
"Self Harm Related Admission" = readr::col_character(),
"Total Net Costs" = readr::col_double(),
"Age at Midpoint of Financial Year" = readr::col_double(),
"Case Reference Number" = readr::col_character(),
"Significant Facility Code" = readr::col_character(),
"Community Hospital Flag" = readr::col_character(),
)
year <- check_year_format(year, format = "fyyear")
c_year <- convert_fyyear_to_year(year)

# Specify years available for running
if (file_path == get_dummy_boxi_extract_path(BYOC_MODE = BYOC_MODE)) {
return(tibble::tibble())
}

on.exit(try(DBI::dbDisconnect(denodo_connect), silent = TRUE), add = TRUE)

# Read Extract
extract_ae <- dplyr::tbl(
denodo_connect,
dbplyr::in_schema("sdl", "sdl_ae2_episode_level_source")
) %>%
# rename variables
dplyr::rename(
record_keydate1 = "Arrival Date",
record_keydate2 = "DAT Date",
dob = "Pat Date Of Birth [C]",
postcode_epi = "Postcode (epi) [C]",
postcode_chi = "Postcode (CHI) [C]",
age = "Age at Midpoint of Financial Year",
ae_alcohol = "Alcohol Involved Code",
alcohol_adm = "Alcohol Related Admission",
ae_arrivalmode = "Arrival Mode Code",
keytime1 = "Arrival Time",
ae_attendcat = "Attendance Category Code",
ae_bodyloc = "Bodily Location Of Injury Code",
lca = "Council Area Code",
ae_disdest = "Discharge Destination Code",
keytime2 = "DAT Time",
diag1 = "Disease 1 Code",
diag2 = "Disease 2 Code",
diag3 = "Disease 3 Code",
falls_adm = "Falls Related Admission",
gpprac = "GP Practice Code",
hscp = "HSCP of Residence Code - current",
hbrescode = "NHS Board of Residence Code - current",
hbtreatcode = "Treatment NHS Board Code - current",
anon_chi = "anon_chi",
gender = "Pat Gender Code",
ae_patflow = "Patient Flow Code",
ae_placeinc = "Place of Incident Code",
ae_reasonwait = "Reason for Wait Code",
refsource = "Referral Source Code",
selfharm_adm = "Self Harm Related Admission",
submis_adm = "Substance Misuse Related Admission",
sigfac = "Significant Facility Code",
cost_total_net = "Total Net Costs",
location = "Treatment Location Code",
case_ref_number = "Case Reference Number",
commhosp = "Community Hospital Flag"
)
dplyr::filter(
financial_year == c_year, # TO-DO: check assumption that arrival_financial_year == financial_year
significant_facility_code == "32" | is.na(significant_facility_code)
) %>%
dplyr::select(
record_keydate1 = "arrival_date",
record_keydate2 = "dat_date",
keytime1 = "arrival_time",
keytime2 = "dat_time",
chi = "patient_chi",
gender = "patient_sex",
dob = "patient_dob",
gpprac = "gp_practice_code",
lca = "council_area_code",
hscp = "hscp_of_residence_code_curr",
location = "treatment_location_code",
hbrescode = "nhs_board_of_residence_code_curr",
hbtreatcode = "treatment_nhs_board_code_curr",
diag1 = "disease_1_code",
diag2 = "disease_2_code",
diag3 = "disease_3_code",
ae_arrivalmode = "arrival_mode_code",
refsource = "referral_source_code",
sigfac = "significant_facility_code",
ae_attendcat = "attendance_category_code",
ae_disdest = "discharge_destination_code",
ae_patflow = "patient_flow_code",
ae_placeinc = "place_of_incident_code",
ae_reasonwait = "reason_for_wait_code",
ae_bodyloc = "bodily_location_of_injury_code",
ae_alcohol = "alcohol_involved_code",
alcohol_adm = "alcohol_related_admission",
submis_adm = "substance_misuse_related_admission",
falls_adm = "falls_related_admission",
selfharm_adm = "self_harm_related_admission",
cost_total_net = "total_net_cost",
age = "age_at_midpoint_of_financial_year",
case_ref_number = "care_reference_number", # TO-DO: needs to be renamed by NSS from care to case?
postcode_epi = "postcode_epi",
postcode_chi = "postcode_chi",
commhosp = "community_hospital_flag"
) %>%
dplyr::collect() %>%
slfhelper::get_anon_chi("chi")

# ----------------------------------------------------------------------------
# TO-DO: Remove this when data types are fixed
extract_ae <- extract_ae %>%
mutate(
keytime1 = hms::parse_hms(keytime1),
keytime2 = hms::parse_hms(keytime2),
gender = as.numeric(gender),
ae_patflow = as.numeric(ae_patflow),
age = as.numeric(age),
commhosp = as.character(commhosp)
)
# ----------------------------------------------------------------------------
log_slf_event(stage = "read", status = "complete", type = "ae", year = year)

return(extract_ae)
Expand Down
22 changes: 20 additions & 2 deletions SDL_process/run_sdl.r
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,15 @@ year <- "1920"
# Build BYOC Output File Paths
byoc_output_files <- get_byoc_output_files(
year = year,
types = c("homelessness", "maternity") # using homelessness for test purpose. When development is complete, we change to "types = "byoc_input_files""
types = c("homelessness", "maternity", "ae") # using homelessness for test purpose. When development is complete, we change to "types = "byoc_input_files""
) # can always use any other type for testing also

## targets ----
targets::tar_make(script = "dummy_targets.R")
logger::log_info("Targets finished.")
# targets::tar_make()

# test homelessness data only
# Test homelessness data
## create homelessness data ----
logger::log_info("Read and process homelessness data")
hl1 <- read_extract_homelessness(
Expand All @@ -192,6 +192,7 @@ hl1 <- read_extract_homelessness(
run_date_time = run_date_time
)

# Test maternity data
logger::log_info("Read and process maternity data")
maternity <- read_extract_maternity(
year,
Expand All @@ -207,4 +208,21 @@ maternity <- read_extract_maternity(
run_date_time = run_date_time
)

# Test ae data
logger::log_info("Read and process A&E data")
ae <- read_extract_ae(
year,
denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
file_path = get_boxi_extract_path(year, type = "ae", BYOC_MODE),
BYOC_MODE = BYOC_MODE
) %>%
process_extract_ae(
year = year,
denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
write_to_disk = TRUE,
BYOC_MODE = BYOC_MODE,
run_id = run_id,
run_date_time = run_date_time
)

logger::log_info("Run SDL ended.")
20 changes: 14 additions & 6 deletions _targets.R
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might need to revert these changes if we test targets with a new script?

Original file line number Diff line number Diff line change
Expand Up @@ -476,22 +476,30 @@ list(
),
# Accident & Emergency (AE2) activity --------------------------------------
# READ - A&E
tar_file_read(
tar_target(
# Target name
ae_data,
get_boxi_extract_path(year, type = "ae"),
# Function
read_extract_ae(year, !!.x)
read_extract_ae(
year = year,
denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
file_path = get_boxi_extract_path(year = year, type = "ae", BYOC_MODE),
BYOC_MODE = BYOC_MODE
)
),
# PROCESS - A&E
tar_target(
# Target name
source_ae_extract,
# Function
process_extract_ae(
ae_data,
year,
write_to_disk = write_to_disk
data = ae_data,
year = year,
denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
write_to_disk = write_to_disk,
BYOC_MODE = BYOC_MODE,
run_id = run_id,
run_date_time = run_date_time
)
),
# TESTS - A&E
Expand Down
10 changes: 9 additions & 1 deletion man/process_extract_ae.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/read_extract_ae.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading