-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpsql_etl.sh
More file actions
65 lines (65 loc) · 2.84 KB
/
psql_etl.sh
File metadata and controls
65 lines (65 loc) · 2.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/bin/bash
#
# Set script to error on any fails
set -e
#
# This script is meant to be sourced by a broader ETL process.
# The function func_psql_etl is effectively the 'main' function. It's defined last, and called by the user of the psql_etl.sh job
#
# List of functions
# func_pe_gbl_variables - sets variables within the script
# func_pe_line_counts - Gets line counts for the config files used (used in xargs parallel parameter)
# func_pe_parallel - Runs the sql in the file on multiple servers simultaneously
# func_psql_etl - 'Main' function, running other functions in sequence, and setting global script parameters from input
#
# Begin script function definitions
#
# Define global script variables
function func_pe_gbl_variables {
# Current directory path
export pe_gbl_curr_dir="$(dirname "$(readlink -f "$0")")"
# Forwardslash as variable for reasons
export pe_gbl_fwdslsh="/"
# SQL file name suffix (after cluster name)
export pe_gbl_sql_file_suffix="_psql_etl.sql"
# Hosts file name suffix (after cluster name)
export pe_gbl_hosts_file_suffix="_psql_hosts.txt"
}
#
# Get and export a count of rows for the two files
function func_pe_line_counts {
# Define SQL file full path
export pe_fpe_sql_file_loc=$pe_gbl_curr_dir$pe_gbl_fwdslsh$pe_fpe_cluster_name$pe_gbl_sql_file_suffix
# Define SQL file line counts
export pe_fpe_sql_line_count=$(cat $pe_fpe_sql_file_loc | wc -l )
# Define hosts file full path
export pe_fpe_hosts_file_loc=$pe_gbl_curr_dir$pe_gbl_fwdslsh$pe_fpe_cluster_name$pe_gbl_hosts_file_suffix
# Define hosts file line count
export pe_fpe_hosts_line_count=$(cat $pe_fpe_hosts_file_loc | wc -l)
}
#
# This function:
# 1. Reads each SQL line from $pe_fpe_sql_file_loc
# 2. Writes the contents to a new temporary file (necessary to deal with escapes and strange characters)
# 3. Executes each line against each server in parallel from $pe_fpe_hosts_file_loc
# 4. Waits until execution is complete across every server before starting the next line of SQL
# 5. Deletes the temporary file located in Step 2
#
function func_pe_parallel {
while read -r pe_sql_file_line; do
pe_sql_line_tmpfile_loc="$(mktemp)"
echo "$pe_sql_file_line" > "$pe_sql_line_tmpfile_loc"
export pe_sql_line_tmpfile_loc_ex=$pe_sql_line_tmpfile_loc
xargs -a $pe_fpe_hosts_file_loc -d '\n' -I {} -P "$pe_fpe_hosts_line_count" bash -c 'psql -q -A -t {} -f $pe_sql_line_tmpfile_loc_ex'
rm -f $pe_sql_line_tmpfile_loc
done <$pe_fpe_sql_file_loc
}
# This is the 'main' function for the script
function func_psql_etl {
# Define the input variable (cluster name). This is the only variable 'passed' back to the top of the script
export pe_fpe_cluster_name=$1
# Run all previously defined functions
func_pe_gbl_variables
func_pe_line_counts
func_pe_parallel
}