Skip to content

Commit 7d2700d

Browse files
authored
Merge branch 'development' into refactor-mh
2 parents 8bef498 + 1e62773 commit 7d2700d

File tree

4 files changed

+151
-52
lines changed

4 files changed

+151
-52
lines changed

SDL_process/dummy_targets.R

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
################################################################################
2+
# Name of file - "dummy_targets.R"
3+
#
4+
# Description:
5+
# A small target example to run as test for BYOC.
6+
#
7+
# To run the targets pipeline, please use:
8+
# targets::tar_make(script = 'dummy_targets.R')
9+
#
10+
################################################################################
11+
12+
library(logger)
13+
library(targets) # main package required
14+
library(tarchetypes) # support for targets
15+
library(crew) # support for parallel processing
16+
library(dplyr)
17+
# Stage 1 - Setup BYOC_MODE in targets -----------------------------------------
18+
BYOC_MODE <- Sys.getenv("BYOC_MODE")
19+
BYOC_MODE <- dplyr::case_when(
20+
BYOC_MODE %in% c("TRUE", "T", "true", "True") ~ TRUE,
21+
BYOC_MODE %in% c("FALSE", "F", "false", "False") ~ FALSE,
22+
TRUE ~ NA
23+
)
24+
25+
if (BYOC_MODE) {
26+
logger::log_info("targets file location on Denodo")
27+
} else {
28+
logger::log_info("targets file location is local")
29+
}
30+
31+
log_threshold(INFO)
32+
33+
# Phase II - Define functions to be used in the test
34+
get_data <- function(year) {
35+
log_info("Starting the test: Data Generation")
36+
37+
df <- data.frame(
38+
x = 1:10,
39+
y = runif(10),
40+
year = year
41+
)
42+
43+
log_info("Data Generation complete")
44+
return(df)
45+
}
46+
47+
analyze_data <- function(data) {
48+
log_info("Starting Data Analysis")
49+
50+
res <- mean(data$y)
51+
52+
log_info("Analysis complete")
53+
return(res)
54+
}
55+
56+
years_to_run <- paste0(20, 17:25)
57+
58+
59+
# Stage 2 - Set up targets
60+
#-------------------------------------------------------------------------------
61+
list(
62+
tar_map(
63+
list(year = years_to_run),
64+
tar_target(aaraw_data, get_data(year = year)),
65+
tar_target(aaverage_value, analyze_data(aaraw_data))
66+
),
67+
tar_target(aapipeline_status, {
68+
log_info("All targets completed successfully.")
69+
return("SUCCESS")
70+
})
71+
)
72+
#-------------------------------------------------------------------------------
73+
## End of Targets pipeline ##
74+
#-------------------------------------------------------------------------------

SDL_process/run_sdl.r

Lines changed: 69 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,34 @@ if (tolower(BYOC_MODE) %in% c("true", "t")) {
6767
BYOC_MODE <- FALSE
6868
}
6969

70+
store_path <- dplyr::if_else(
71+
BYOC_MODE,
72+
"/sdl_byoc/_targets",
73+
"/conf/sourcedev/Source_Linkage_File_Updates/_targets"
74+
)
75+
7076
# run_id <- Sys.getenv("run_id")
7177
# run_date_time <- Sys.getenv("run_date_time")
7278
run_date_time <- script_run_time
7379

80+
# Include reporting of last run date of ACADME
81+
82+
if (isFALSE(BYOC_MODE)) {
83+
denodo_connect <- createslf::get_denodo_connection(BYOC_MODE = BYOC_MODE)
84+
}
85+
dplyr::tbl(
86+
denodo_connect,
87+
dbplyr::in_schema("sdl", "sdl_byoc_acadme_load_detail")
88+
) %>%
89+
dplyr::collect() %>%
90+
# Optional: Format the date to look clean first
91+
dplyr::mutate(load_str = format(load_date, "%Y-%m-%d %H:%M:%S")) %>%
92+
purrr::pwalk(function(data_mart, load_str, ...) {
93+
logger::log_info("{data_mart} loaded at {load_str}")
94+
})
95+
if (isFALSE(BYOC_MODE)) {
96+
odbc::dbDisconnect(denodo_connect)
97+
}
7498

7599
write_to_disk <- TRUE
76100

@@ -159,54 +183,56 @@ sg_pub_data <- data.frame(
159183
# just test one year
160184
year <- "1920"
161185

162-
# Build BYOC Output File Paths
163-
byoc_output_files <- get_byoc_output_files(
164-
year = year,
165-
types = c("homelessness", "maternity", "mh") # using homelessness for test purpose. When development is complete, we change to "types = "byoc_input_files""
166-
) # can always use any other type for testing also
186+
# # Build BYOC Output File Paths
187+
# byoc_output_files <- get_byoc_output_files(
188+
# year = year,
189+
# types = c("homelessness", "maternity", "mh") # using homelessness for test purpose. When development is complete, we change to "types = "byoc_input_files""
190+
# ) # can always use any other type for testing also
167191

192+
## targets ----
193+
targets::tar_make(
194+
script = "SDL_process/dummy_targets.R",
195+
store = store_path
196+
)
197+
logger::log_info("Targets finished.")
168198
# targets::tar_make()
169199

170200
# test homelessness data only
171201
## create homelessness data ----
172-
logger::log_info("Read and process homelessness data")
173-
hl1 <- read_extract_homelessness(
174-
year,
175-
denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
176-
file_path = get_boxi_extract_path(
177-
year = year,
178-
type = "homelessness",
179-
BYOC_MODE = BYOC_MODE
180-
),
181-
BYOC_MODE = BYOC_MODE
182-
) %>% process_extract_homelessness(
183-
year = year,
184-
write_to_disk = write_to_disk,
185-
la_code_lookup = la_code_lookup,
186-
sg_pub_data = sg_pub_data,
187-
BYOC_MODE = BYOC_MODE,
188-
run_id = run_id,
189-
run_date_time = run_date_time
190-
)
191-
192-
logger::log_info("Read and process maternity data")
193-
maternity <- read_extract_maternity(
194-
year,
195-
denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
196-
file_path = get_boxi_extract_path(
197-
year = year,
198-
type = "maternity",
199-
BYOC_MODE = BYOC_MODE
200-
),
201-
BYOC_MODE = BYOC_MODE
202-
) %>%
203-
process_extract_maternity(
204-
year = year,
205-
write_to_disk = TRUE,
206-
BYOC_MODE = BYOC_MODE,
207-
run_id = run_id,
208-
run_date_time = run_date_time
209-
)
202+
# logger::log_info("Read and process homelessness data")
203+
# hl1 <- read_extract_homelessness(
204+
# year,
205+
# denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
206+
# file_path = get_boxi_extract_path(
207+
# year = year,
208+
# type = "homelessness",
209+
# BYOC_MODE = BYOC_MODE
210+
# ),
211+
# BYOC_MODE = BYOC_MODE
212+
# ) %>% process_extract_homelessness(
213+
# year = year,
214+
# write_to_disk = write_to_disk,
215+
# la_code_lookup = la_code_lookup,
216+
# sg_pub_data = sg_pub_data,
217+
# BYOC_MODE = BYOC_MODE,
218+
# run_id = run_id,
219+
# run_date_time = run_date_time
220+
# )
221+
#
222+
# logger::log_info("Read and process maternity data")
223+
# maternity <- read_extract_maternity(
224+
# year,
225+
# denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
226+
# file_path = get_boxi_extract_path(year, type = "maternity", BYOC_MODE),
227+
# BYOC_MODE = BYOC_MODE
228+
# ) %>%
229+
# process_extract_maternity(
230+
# year = year,
231+
# write_to_disk = TRUE,
232+
# BYOC_MODE = BYOC_MODE,
233+
# run_id = run_id,
234+
# run_date_time = run_date_time
235+
# )
210236

211237
logger::log_info("Read and process mental health data")
212238
mental_health <- read_extract_mental_health(

_targets.R

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ controller <- crew::crew_controller_local(
3333
name = "my_controller",
3434
# Specify 6 workers for parallel processing - works with 8CPU, 128GB posit session
3535
workers = 6,
36-
seconds_idle = 3
36+
seconds_idle = 30
3737
)
3838

3939
# Targets options
@@ -48,17 +48,16 @@ tar_option_set(
4848
# format - default is parquet format
4949
format = "parquet",
5050
resources = tar_resources(
51-
parquet = tar_resources_parquet(compression = "zstd"),
52-
qs = tar_resources_qs(preset = "high")
51+
parquet = tar_resources_parquet(compression = "zstd")
5352
),
5453
# error - if an error occurs, the pipeline will continue
5554
error = "continue",
5655
# storage - the worker saves/uploads the value.
5756
storage = "worker",
5857
# retrieval - the worker loads the target's dependencies.
59-
retrieval = "worker",
58+
retrieval = "auto",
6059
# memory - default option: the target stays in memory until the end of the pipeline
61-
memory = "persistent",
60+
memory = "auto",
6261
# controller - A controller or controller group object produced by the crew R package
6362
controller = controller
6463
)

_targets.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
main:
2-
store: /conf/sourcedev/Source_Linkage_File_Updates/_targets
3-
workers: '16'
4-
reporter_make: timestamp_positives
2+
workers: '8'
3+
reporter: verbose
54
reporter_outdated: forecast
6-
seconds_interval: 30
5+
seconds_meta_append: 30
6+
store: /sdl_byoc/_targets

0 commit comments

Comments
 (0)