77# 2. Run `targets::tar_make()` (in the same directory) to convert
88# registers to Parquet.
99#
10+ # Note: this pipeline re-converts all files on every `tar_make()` call by
11+ # deleting files in the output directory before converting. The main benefit of
12+ # targets here is parallel execution across workers.
13+ #
1014# For more information on targets, see https://books.ropensci.org/targets/
1115
1216library(targets )
@@ -17,11 +21,11 @@ config <- list(
1721 # Path to locate SAS files in.
1822 input_dir = " /path/to/register/sas/files/directory" ,
1923 # Path to output Parquet files in. Parquet files will be located in
20- # subdirectories of this path .
24+ # subdirectories of this directory .
2125 output_dir = " /path/to/output/directory"
2226)
2327
24- # Check input path .
28+ # Check input directory .
2529if (! dir.exists(config $ input_dir )) {
2630 cli :: cli_abort(
2731 message = " Input directory does not exist: {config$input_dir}"
@@ -61,12 +65,29 @@ list(
6165 deployment = " main"
6266 ),
6367
68+ # Empty output directory before writing to avoid outdated Parquet files.
69+ # Runs on every `tar_make()` call (mode = "always") to ensure a clean slate.
70+ tar_target(
71+ name = output_dir ,
72+ command = {
73+ if (fs :: dir_exists(config $ output_dir )) {
74+ fs :: dir_delete(config $ output_dir )
75+ }
76+ fs :: dir_create(config $ output_dir )
77+ config $ output_dir
78+ },
79+ deployment = " main" ,
80+ cue = tar_cue(mode = " always" )
81+ ),
82+
83+ # Convert each SAS file in parallel. mode = "always" is required because
84+ # `output_dir` returns the same path string on every run, so targets would
85+ # otherwise consider this target up-to-date and skip it despite the output
86+ # directory having been cleaned.
6487 tar_target(
6588 name = parquet_files ,
66- command = convert_file(
67- path = sas_paths ,
68- output_dir = config $ output_dir
69- ),
70- pattern = map(sas_paths )
89+ command = convert_file(path = sas_paths , output_dir = output_dir ),
90+ pattern = map(sas_paths ),
91+ cue = tar_cue(mode = " always" )
7192 )
7293)
0 commit comments