|
3 | 3 | BUCKET=glue-pyspark-test |
4 | 4 | JOB_NAME=test-job1 |
5 | 5 | S3_URL=s3://$BUCKET/job.py |
| 6 | +CLUSTER_IDENTIFIER=glue-etl-cluster1 |
| 7 | +CONNECTION_NAME=glue-etl-cluster1-connection |
6 | 8 |
|
7 | 9 | echo Putting PySpark script to test S3 bucket ... |
8 | | -awslocal s3 mb s3://$BUCKET |
9 | | -awslocal s3 cp job.py $S3_URL |
10 | | -awslocal s3 mb s3://glue-sample-target |
| 10 | +awslocal s3 mb "s3://$BUCKET" |
| 11 | +awslocal s3 cp "job.py" "$S3_URL" |
| 12 | +awslocal s3 mb "s3://glue-sample-target" |
11 | 13 |
|
12 | | -cluster_identifier=glue_etl_cluster1 |
| 14 | +awslocal rds create-db-cluster --db-cluster-identifier "$CLUSTER_IDENTIFIER" --engine aurora-postgresql --database-name test |
13 | 15 |
|
14 | | -awslocal rds create-db-cluster --db-cluster-identifier $cluster_identifier --engine aurora-postgresql --database-name test |
15 | | - |
16 | | -db_port=$(awslocal rds describe-db-clusters --db-cluster-identifier $cluster_identifier) | jq -r '.DBClusters[0].Port' |
| 16 | +db_port=$(awslocal rds describe-db-clusters --db-cluster-identifier "$CLUSTER_IDENTIFIER" | jq -r '.DBClusters[0].Port') |
17 | 17 | echo Using local RDS database on port $db_port ... |
18 | 18 |
|
19 | 19 | echo Creating Glue databases and tables ... |
20 | 20 | awslocal glue create-database --database-input '{"Name": "legislators"}' |
21 | 21 | awslocal glue create-table --database legislators \ |
22 | | - --table-input '{"Name":"memberships_json", "Parameters": {"connectionName": "$cluster_identifier"}, "StorageDescriptor": {"Location": "test.memberships"}}' |
| 22 | + --table-input '{"Name": "memberships_json", "Parameters": {"connectionName": "'$CONNECTION_NAME'"}, "StorageDescriptor": {"Location": "test.memberships"}}' |
23 | 23 | awslocal glue create-table --database legislators \ |
24 | | - --table-input '{"Name":"persons_json", "Parameters": {"connectionName": "$cluster_identifier"}, "StorageDescriptor": {"Location": "test.persons"}}' |
| 24 | + --table-input '{"Name": "persons_json", "Parameters": {"connectionName": "'$CONNECTION_NAME'"}, "StorageDescriptor": {"Location": "test.persons"}}' |
25 | 25 | awslocal glue create-table --database legislators \ |
26 | | - --table-input '{"Name":"organizations_json", "Parameters": {"connectionName": "$cluster_identifier"}, "StorageDescriptor": {"Location": "test.organizations"}}' |
| 26 | + --table-input '{"Name": "organizations_json", "Parameters": {"connectionName": "'$CONNECTION_NAME'"}, "StorageDescriptor": {"Location": "test.organizations"}}' |
27 | 27 | awslocal glue create-connection \ |
28 | | - --connection-input '{"Name":"$cluster_identifier", "ConnectionType": "JDBC", "ConnectionProperties": {"USERNAME": "test", "PASSWORD": "test", "JDBC_CONNECTION_URL": "jdbc:postgresql://localhost.localstack.cloud:'$db_port'"}}' |
| 28 | + --connection-input '{"Name": "'$CONNECTION_NAME'", "ConnectionType": "JDBC", "ConnectionProperties": {"USERNAME": "test", "PASSWORD": "test", "JDBC_CONNECTION_URL": "jdbc:postgresql://localhost.localstack.cloud:'$db_port'"}}' |
29 | 29 |
|
30 | 30 | secret=$(awslocal secretsmanager create-secret --name mysecret --secret-string "12345678" | jq -r ".ARN") |
31 | 31 |
|
32 | 32 | echo Creating Postgres database tables with data ... |
33 | | -awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$cluster_identifier --secret-arn $secret --sql 'CREATE TABLE IF NOT EXISTS persons(id varchar, name varchar)' |
34 | | -awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$cluster_identifier --secret-arn $secret --sql 'CREATE TABLE IF NOT EXISTS organizations(org_id varchar, org_name varchar)' |
35 | | -awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$cluster_identifier --secret-arn $secret --sql 'CREATE TABLE IF NOT EXISTS memberships(person_id varchar, organization_id varchar)' |
36 | | -awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$cluster_identifier --secret-arn $secret --sql "insert into persons(id, name) VALUES('p1', 'person 1')" |
37 | | -awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$cluster_identifier --secret-arn $secret --sql "insert into organizations(org_id, org_name) VALUES('o1', 'org1')" |
38 | | -awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$cluster_identifier --secret-arn $secret --sql "insert into memberships(person_id, organization_id) VALUES('p1', 'o1')" |
39 | | -awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$cluster_identifier --secret-arn $secret --sql 'CREATE TABLE IF NOT EXISTS hist_root(id varchar, name varchar, org_id varchar, org_name varchar, person_id varchar, organization_id varchar)' |
| 33 | +awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$CLUSTER_IDENTIFIER --secret-arn $secret --sql 'CREATE TABLE IF NOT EXISTS persons(id varchar, name varchar)' |
| 34 | +awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$CLUSTER_IDENTIFIER --secret-arn $secret --sql 'CREATE TABLE IF NOT EXISTS organizations(org_id varchar, org_name varchar)' |
| 35 | +awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$CLUSTER_IDENTIFIER --secret-arn $secret --sql 'CREATE TABLE IF NOT EXISTS memberships(person_id varchar, organization_id varchar)' |
| 36 | +awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$CLUSTER_IDENTIFIER --secret-arn $secret --sql "insert into persons(id, name) VALUES('p1', 'person 1')" |
| 37 | +awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$CLUSTER_IDENTIFIER --secret-arn $secret --sql "insert into organizations(org_id, org_name) VALUES('o1', 'org1')" |
| 38 | +awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$CLUSTER_IDENTIFIER --secret-arn $secret --sql "insert into memberships(person_id, organization_id) VALUES('p1', 'o1')" |
| 39 | +awslocal rds-data execute-statement --resource-arn arn:aws:rds:us-east-1:000000000000:cluster:$CLUSTER_IDENTIFIER --secret-arn $secret --sql 'CREATE TABLE IF NOT EXISTS hist_root(id varchar, name varchar, org_id varchar, org_name varchar, person_id varchar, organization_id varchar)' |
40 | 40 |
|
41 | 41 | echo Starting Glue job from PySpark script ... |
42 | 42 | awslocal glue create-job --name $JOB_NAME --role r1 \ |
43 | 43 | --command '{"Name": "pythonshell", "ScriptLocation": "'$S3_URL'"}' \ |
44 | | - --connections '{"Connections": ["$cluster_identifier"]}' |
| 44 | + --connections '{"Connections": ["'$CLUSTER_IDENTIFIER'"]}' |
45 | 45 | run_id=$(awslocal glue start-job-run --job-name $JOB_NAME | jq -r .JobRunId) |
46 | 46 |
|
47 | 47 | state=$(awslocal glue get-job-run --job-name $JOB_NAME --run-id $run_id | jq -r .JobRun.JobRunState) |
|
0 commit comments