Skip to content

Commit 3dc2b8f

Browse files
authored
fix: 🐛 always clean up targets template output_dir before converting to Parquet (#224)
# Description To avoid duplicate Parquet files when the template is rerun. Closes #210 Needs a thorough review. ## Checklist - [X] Ran `just run-all`
1 parent 4c92367 commit 3dc2b8f

File tree

1 file changed

+28
-7
lines changed

1 file changed

+28
-7
lines changed

inst/template-targets.R

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
# 2. Run `targets::tar_make()` (in the same directory) to convert
88
# registers to Parquet.
99
#
10+
# Note: this pipeline re-converts all files on every `tar_make()` call by
11+
# deleting files in the output directory before converting. The main benefit of
12+
# targets here is parallel execution across workers.
13+
#
1014
# For more information on targets, see https://books.ropensci.org/targets/
1115

1216
library(targets)
@@ -17,11 +21,11 @@ config <- list(
1721
# Path to locate SAS files in.
1822
input_dir = "/path/to/register/sas/files/directory",
1923
# Path to output Parquet files in. Parquet files will be located in
20-
# subdirectories of this path.
24+
# subdirectories of this directory.
2125
output_dir = "/path/to/output/directory"
2226
)
2327

24-
# Check input path.
28+
# Check input directory.
2529
if (!dir.exists(config$input_dir)) {
2630
cli::cli_abort(
2731
message = "Input directory does not exist: {config$input_dir}"
@@ -61,12 +65,29 @@ list(
6165
deployment = "main"
6266
),
6367

68+
# Empty output directory before writing to avoid outdated Parquet files.
69+
# Runs on every `tar_make()` call (mode = "always") to ensure a clean slate.
70+
tar_target(
71+
name = output_dir,
72+
command = {
73+
if (fs::dir_exists(config$output_dir)) {
74+
fs::dir_delete(config$output_dir)
75+
}
76+
fs::dir_create(config$output_dir)
77+
config$output_dir
78+
},
79+
deployment = "main",
80+
cue = tar_cue(mode = "always")
81+
),
82+
83+
# Convert each SAS file in parallel. mode = "always" is required because
84+
# `output_dir` returns the same path string on every run, so targets would
85+
# otherwise consider this target up-to-date and skip it despite the output
86+
# directory having been cleaned.
6487
tar_target(
6588
name = parquet_files,
66-
command = convert_file(
67-
path = sas_paths,
68-
output_dir = config$output_dir
69-
),
70-
pattern = map(sas_paths)
89+
command = convert_file(path = sas_paths, output_dir = output_dir),
90+
pattern = map(sas_paths),
91+
cue = tar_cue(mode = "always")
7192
)
7293
)

0 commit comments

Comments
 (0)