1+ #! /bin/bash
2+ set -e -x
3+
4+ REDSHIFT_CLUSTER_IDENTIFIER=" redshiftcluster"
5+ REDSHIFT_SCHEMA_NAME=" public"
6+ REDSHIFT_DATABASE_NAME=" db1"
7+ REDSHIFT_TABLE_NAME=" sales"
8+ REDSHIFT_USERNAME=" crawlertestredshiftusername"
9+ REDSHIFT_PASSWORD=" crawlertestredshiftpassword"
10+ GLUE_DATABASE_NAME=" gluedb"
11+ GLUE_CONNECTION_NAME=" glueconnection"
12+ GLUE_CRAWLER_NAME=" gluecrawler"
13+
14+ # Tear-down function to cleanup on exit
15+ function cleanup() {
16+ echo " "
17+ echo " (Cleanup) Deleting Redshift cluster."
18+ awslocal redshift delete-cluster --cluster-identifier $REDSHIFT_CLUSTER_IDENTIFIER 2> /dev/null || true
19+ echo " (Cleanup) Deleting Glue database."
20+ awslocal glue delete-database --name $GLUE_DATABASE_NAME 2> /dev/null || true
21+ echo " (Cleanup) Deleting Glue connection."
22+ awslocal glue delete-connection --connection-name $GLUE_CONNECTION_NAME 2> /dev/null || true
23+ echo " (Cleanup) Deleting Glue crawler."
24+ awslocal glue delete-crawler --name $GLUE_CRAWLER_NAME 2> /dev/null || true
25+ }
26+ trap cleanup EXIT
27+
28+ wait () {
29+ set -e -x
30+ command=$1
31+ field=$2
32+ expected=$3
33+ current=$( $command | jq -r $field )
34+ while [ " $current " != " $expected " ]; do
35+ sleep 5
36+ echo " Waiting for state change. Current: $current / Expected: $expected "
37+ current=$( $command | jq -r $field )
38+ done
39+ }
40+
41+ # Cleanup
42+ cleanup
43+
44+ # Create the redshift cluster
45+ echo " Creating Redshift cluster..."
46+ awslocal redshift create-cluster --cluster-identifier $REDSHIFT_CLUSTER_IDENTIFIER --db-name $REDSHIFT_DATABASE_NAME --master-username $REDSHIFT_USERNAME --master-user-password $REDSHIFT_PASSWORD --node-type n1
47+ wait " awslocal redshift describe-clusters --cluster-identifier $REDSHIFT_CLUSTER_IDENTIFIER " " .Clusters[0].ClusterStatus" " available"
48+ REDSHIFT_URL=$( awslocal redshift describe-clusters --cluster-identifier $REDSHIFT_CLUSTER_IDENTIFIER | jq -r ' (.Clusters[0].Endpoint.Address) + ":" + (.Clusters[0].Endpoint.Port|tostring)' )
49+
50+ # Create the Glue database, connection, and crawler
51+ echo " Creating Glue db, connection, and crawler..."
52+ awslocal glue create-database --database-input " {\" Name\" : \" $GLUE_DATABASE_NAME \" }"
53+ awslocal glue create-connection --connection-input " {\" Name\" :\" $GLUE_CONNECTION_NAME \" , \" ConnectionType\" : \" JDBC\" , \" ConnectionProperties\" : {\" USERNAME\" : \" $REDSHIFT_USERNAME \" , \" PASSWORD\" : \" $REDSHIFT_PASSWORD \" , \" JDBC_CONNECTION_URL\" : \" jdbc:redshift://$REDSHIFT_URL /$REDSHIFT_DATABASE_NAME \" }}"
54+ awslocal glue create-crawler --name $GLUE_CRAWLER_NAME --database-name $GLUE_DATABASE_NAME --targets " {\" JdbcTargets\" : [{\" ConnectionName\" : \" $GLUE_CONNECTION_NAME \" , \" Path\" : \" $REDSHIFT_DATABASE_NAME /%/$REDSHIFT_TABLE_NAME \" }]}" --role r1
55+
56+ # Create a table in the redshift DB
57+ echo " Creating table in Redshift DB..."
58+ REDSHIFT_STATEMENT_ID=$( awslocal redshift-data execute-statement --cluster-identifier $REDSHIFT_CLUSTER_IDENTIFIER --database $REDSHIFT_DATABASE_NAME --sql \
59+ " create table $REDSHIFT_TABLE_NAME (salesid integer not null, listid integer not null, sellerid integer not null, buyerid integer not null, eventid integer not null, dateid smallint not null, qtysold smallint not null, pricepaid decimal(8,2), commission decimal(8,2), saletime timestamp)" | jq -r .Id)
60+ wait " awslocal redshift-data describe-statement --id $REDSHIFT_STATEMENT_ID " " .Status" " FINISHED"
61+
62+ # Run the crawler
63+ echo " Starting Crawler..."
64+ awslocal glue start-crawler --name $GLUE_CRAWLER_NAME
65+ wait " awslocal glue get-crawler --name $GLUE_CRAWLER_NAME " " .Crawler.State" " READY"
66+
67+ echo " Getting Glue table..."
68+ awslocal glue get-table --database-name $GLUE_DATABASE_NAME --name " ${REDSHIFT_DATABASE_NAME} _${REDSHIFT_SCHEMA_NAME} _${REDSHIFT_TABLE_NAME} "
69+
70+ echo " Done."
0 commit comments