-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate_cluster.py
More file actions
71 lines (62 loc) · 2.57 KB
/
create_cluster.py
File metadata and controls
71 lines (62 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import botocore
import numpy as np
import pandas as pd
import boto3
from botocore import errorfactory
import configparser
config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))
# REDSHIFT PARAMETERS
KEY = config.get('AWS', 'KEY')
SECRET = config.get('AWS', 'SECRET')
DB_ROLE_NAME = config.get('IAM_ROLE','DB_IAM_ROLE_NAME')
HOST = config.get('CLUSTER', 'HOST')
DB_PORT = config.get('CLUSTER', 'DB_PORT')
DB_CLUSTER_TYPE = config.get('CLUSTER', 'DB_CLUSTER_TYPE')
DB_NODE_TYPE = config.get('CLUSTER', 'DB_NODE_TYPE')
DB_NAME = config.get('CLUSTER', 'DB_NAME')
DB_CLUSTER_IDENTIFIER = config.get('CLUSTER', 'DB_CLUSTER_IDENTIFIER')
DB_USER = config.get('CLUSTER', 'DB_USER')
DB_PASSWORD = config.get('CLUSTER', 'DB_PASSWORD')
# ESTABLISH RESOURCES TO CREATE AND CONNECT TO REDSHIFT CLUSTER
s3 = boto3.resource('s3',
region_name="us-west-2",
aws_access_key_id=KEY,
aws_secret_access_key=SECRET
)
iam = boto3.client('iam',
region_name="us-west-2",
aws_access_key_id=KEY,
aws_secret_access_key=SECRET
)
redshift = boto3.client('redshift',
region_name="us-west-2",
aws_access_key_id=KEY,
aws_secret_access_key=SECRET
)
# GET IAM ROLE
roleArn = iam.get_role(RoleName=DB_ROLE_NAME)['Role']['Arn']
# CREATE CLUSTER
try:
response = redshift.create_cluster(
# ADD PARAMETER FOR HARDWARE
ClusterType=DB_CLUSTER_TYPE,
NodeType=DB_NODE_TYPE,
# ADD PARAMETERS FOR IDENTIFIERS & CREDENTIALS
DBName=DB_NAME,
ClusterIdentifier=DB_CLUSTER_IDENTIFIER,
MasterUsername=DB_USER,
MasterUserPassword=DB_PASSWORD,
# ADD PARAMETER FOR ROLE (TO ALLOW S3 ACCESS)
IamRoles=[roleArn]
)
except Exception as e:
print(e)
# CHECK REDSHIFT CLUSTER STATUS
def clusterProperties(props):
pd.set_option('display.max_colwidth', 1)
keysToShow = ["ClusterIdentifier", "NodeType", "ClusterStatus", "MasterUsername", "DBName", "Endpoint", "VpcId"]
x = [(k, v) for k,v in props.items() if k in keysToShow]
return pd.DataFrame(data=x, columns=["Key", "Value"])
myClusterprops = redshift.describe_clusters(ClusterIdentifier=DB_CLUSTER_IDENTIFIER)['Clusters'][0]
print(clusterProperties(myClusterprops))