Skip to content

Commit 5c48bba

Browse files
author
Dougal Ballantyne
committed
New sanity check for CLI; multi-OS changes; move to supervisord for daemons
1 parent c4417c4 commit 5c48bba

File tree

14 files changed

+495
-113
lines changed

14 files changed

+495
-113
lines changed

bootstrap/src/scripts/boot_as_compute

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,9 @@ if [ $RC -ne 0 ]; then
5454
error_exit "Failed during Ganglia setup"
5555
fi
5656

57-
# Adding nodewatcher to crontab
58-
RC=0
59-
crontab -l > /tmp/root.crontab
60-
echo "* * * * * cd /opt/cfncluster/nodewatcher && ./nodewatcher.py >> nodewatcher.log 2>&1" >> /tmp/root.crontab || RC=1
61-
crontab /tmp/root.crontab || RC=1
62-
if [ $RC -ne 0 ]; then
63-
error_exit "Failed to nodewatcher crontab"
64-
fi
57+
# Startup nodewatcher
58+
service supervisord restart
59+
supervisorctl status
6560

6661
##
6762
# Run boot as compute for a specific scheduler

bootstrap/src/scripts/boot_as_master

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,8 @@ if [ $RC -ne 0 ]; then
155155
fi
156156

157157
# Start sqswatcher
158-
cd /opt/cfncluster/sqswatcher && ./sqswatcher.py 2>&1
158+
service supervisord restart
159+
supervisorctl status
159160

160161
# Run postinstall script if defined
161162
run_postinstall

cli/cfncluster/cfnconfig.py

Lines changed: 56 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import logging
1818
import json
1919
import urllib2
20+
import config_sanity
2021

2122
class CfnClusterConfig:
2223

@@ -73,25 +74,11 @@ def __init__(self, args):
7374
except Exception:
7475
pass
7576

76-
# Get the EC2 keypair name to be used, exit if not set
77+
# Check if config sanity should be run
7778
try:
78-
self.key_name = __config.get(self.__cluster_section, 'key_name')
79-
if not self.key_name:
80-
raise Exception
79+
self.__sanity_check = __config.getboolean('global', 'sanity_check')
8180
except ConfigParser.NoOptionError:
82-
raise Exception
83-
self.parameters.append(('KeyName', self.key_name))
84-
85-
# Determine which keypair config will be used
86-
self.__keypair_section = ('keypair %s' % self.key_name)
87-
88-
# Get the location of the keypair file
89-
try:
90-
self.key_location = __config.get(self.__keypair_section, 'key_location')
91-
if not self.key_location:
92-
raise Exception
93-
except ConfigParser.NoOptionError:
94-
raise Exception
81+
self.__sanity_check = False
9582

9683
# Determine the EC2 region to used used or default to us-east-1
9784
# Order is 1) CLI arg 2) AWS_DEFAULT_REGION env 3) Config file 4) us-east-1
@@ -116,6 +103,18 @@ def __init__(self, args):
116103
except ConfigParser.NoOptionError:
117104
self.aws_secret_access_key=None
118105

106+
# Get the EC2 keypair name to be used, exit if not set
107+
try:
108+
self.key_name = __config.get(self.__cluster_section, 'key_name')
109+
if not self.key_name:
110+
raise Exception
111+
if self.__sanity_check:
112+
config_sanity.check_resource(self.region,self.aws_access_key_id, self.aws_secret_access_key,
113+
'EC2KeyPair', self.key_name)
114+
except ConfigParser.NoOptionError:
115+
raise Exception
116+
self.parameters.append(('KeyName', self.key_name))
117+
119118
# Determine the CloudFormation URL to be used
120119
# Order is 1) CLI arg 2) Config file 3) default for version + region
121120
try:
@@ -126,6 +125,9 @@ def __init__(self, args):
126125
'template_url')
127126
if not self.template_url:
128127
raise Exception
128+
if self.__sanity_check:
129+
config_sanity.check_resource(self.region,self.aws_access_key_id, self.aws_secret_access_key,
130+
'URL', self.template_url)
129131
except ConfigParser.NoOptionError:
130132
self.template_url = ('https://s3.amazonaws.com/cfncluster-%s/templates/cfncluster-%s.cfn.json' % (self.region, self.version))
131133
except AttributeError:
@@ -136,38 +138,46 @@ def __init__(self, args):
136138
self.__vpc_section = ('vpc %s' % self.__vpc_settings)
137139

138140
# Dictionary list of all VPC options
139-
self.__vpc_options = dict(vpc_id='VPCId', master_subnet_id='MasterSubnetId', compute_subnet_cidr='ComputeSubnetCidr',
140-
compute_subnet_id='ComputeSubnetId', use_public_ips='UsePublicIps' , ssh_from='SSHFrom')
141+
self.__vpc_options = dict(vpc_id=('VPCId','VPC'), master_subnet_id=('MasterSubnetId', 'VPCSubnet'),
142+
compute_subnet_cidr=('ComputeSubnetCidr',None),
143+
compute_subnet_id=('ComputeSubnetId', 'VPCSubnet'), use_public_ips=('UsePublicIps', None),
144+
ssh_from=('SSHFrom', None))
141145

142146
# Loop over all VPC options and add define to parameters, raise Exception is defined but null
143147
for key in self.__vpc_options:
144148
try:
145149
__temp__ = __config.get(self.__vpc_section, key)
146150
if not __temp__:
147151
raise Exception
148-
self.parameters.append((self.__vpc_options.get(key),__temp__))
152+
if self.__sanity_check and self.__vpc_options.get(key)[1] is not None:
153+
config_sanity.check_resource(self.region,self.aws_access_key_id, self.aws_secret_access_key,
154+
self.__vpc_options.get(key)[1],__temp__)
155+
self.parameters.append((self.__vpc_options.get(key)[0],__temp__))
149156
except ConfigParser.NoOptionError:
150157
pass
151158

152159
# Dictionary list of all cluster section options
153-
self.__cluster_options = dict(cluster_user='ClusterUser', compute_instance_type='ComputeInstanceType',
154-
master_instance_type='MasterInstanceType', initial_queue_size='InitialQueueSize',
155-
max_queue_size='MaxQueueSize', maintain_initial_size='MaintainInitialSize',
156-
scheduler='Scheduler', cluster_type='ClusterType', ephemeral_dir='EphemeralDir',
157-
spot_price='SpotPrice', custom_ami='CustomAMI', pre_install='PreInstallScript',
158-
post_install='PostInstallScript', proxy_server='ProxyServer',
159-
placement='Placement', placement_group='PlacementGroup',
160-
encrypted_ephemeral='EncryptedEphemeral',pre_install_args='PreInstallArgs',
161-
post_install_args='PostInstallArgs', s3_read_resource='S3ReadResource',
162-
s3_read_write_resource='S3ReadWriteResource')
160+
self.__cluster_options = dict(cluster_user=('ClusterUser', None), compute_instance_type=('ComputeInstanceType',None),
161+
master_instance_type=('MasterInstanceType', None), initial_queue_size=('InitialQueueSize',None),
162+
max_queue_size=('MaxQueueSize',None), maintain_initial_size=('MaintainInitialSize',None),
163+
scheduler=('Scheduler',None), cluster_type=('ClusterType',None), ephemeral_dir=('EphemeralDir',None),
164+
spot_price=('SpotPrice',None), custom_ami=('CustomAMI','EC2Ami'), pre_install=('PreInstallScript','URL'),
165+
post_install=('PostInstallScript','URL'), proxy_server=('ProxyServer',None),
166+
placement=('Placement',None), placement_group=('PlacementGroup','EC2PlacementGroup'),
167+
encrypted_ephemeral=('EncryptedEphemeral',None),pre_install_args=('PreInstallArgs',None),
168+
post_install_args=('PostInstallArgs',None), s3_read_resource=('S3ReadResource',None),
169+
s3_read_write_resource=('S3ReadWriteResource',None))
163170

164171
# Loop over all the cluster options and add define to parameters, raise Exception if defined but null
165172
for key in self.__cluster_options:
166173
try:
167174
__temp__ = __config.get(self.__cluster_section, key)
168175
if not __temp__:
169176
raise Exception
170-
self.parameters.append((self.__cluster_options.get(key),__temp__))
177+
if self.__sanity_check and self.__cluster_options.get(key)[1] is not None:
178+
config_sanity.check_resource(self.region,self.aws_access_key_id, self.aws_secret_access_key,
179+
self.__cluster_options.get(key)[1],__temp__)
180+
self.parameters.append((self.__cluster_options.get(key)[0],__temp__))
171181
except ConfigParser.NoOptionError:
172182
pass
173183

@@ -181,8 +191,9 @@ def __init__(self, args):
181191
pass
182192

183193
# Dictionary list of all EBS options
184-
self.__ebs_options = dict(ebs_snapshot_id='EBSSnapshotId', volume_type='VolumeType', volume_size='VolumeSize',
185-
volume_iops='VolumeIOPS', encrypted='EBSEncryption')
194+
self.__ebs_options = dict(ebs_snapshot_id=('EBSSnapshotId','EC2Snapshot'), volume_type=('VolumeType',None),
195+
volume_size=('VolumeSize',None),
196+
volume_iops=('VolumeIOPS',None), encrypted=('EBSEncryption',None))
186197

187198
try:
188199
if self.__ebs_section:
@@ -191,7 +202,10 @@ def __init__(self, args):
191202
__temp__ = __config.get(self.__ebs_section, key)
192203
if not __temp__:
193204
raise Exception
194-
self.parameters.append((self.__ebs_options.get(key),__temp__))
205+
if self.__sanity_check and self.__ebs_options.get(key)[1] is not None:
206+
config_sanity.check_resource(self.region,self.aws_access_key_id, self.aws_secret_access_key,
207+
self.__ebs_options.get(key)[1],__temp__)
208+
self.parameters.append((self.__ebs_options.get(key)[0],__temp__))
195209
except ConfigParser.NoOptionError:
196210
pass
197211
except AttributeError:
@@ -207,8 +221,10 @@ def __init__(self, args):
207221
pass
208222

209223
# Dictionary list of all scaling options
210-
self.__scaling_options = dict(scaling_threshold='ScalingThreshold', scaling_period='ScalingPeriod',
211-
scaling_evaluation_periods='ScalingEvaluationPeriods')
224+
self.__scaling_options = dict(scaling_threshold=('ScalingThreshold',None), scaling_period=('ScalingPeriod',None),
225+
scaling_evaluation_periods=('ScalingEvaluationPeriods',None),
226+
scaling_adjustment=('ScalingAdjustment',None),scaling_adjustment2=('ScalingAdjustment2',None),
227+
scaling_cooldonw=('ScalingCooldown',None),scaling_threshold2=('ScalingThreshold2',None))
212228

213229
try:
214230
if self.__scaling_section:
@@ -217,7 +233,10 @@ def __init__(self, args):
217233
__temp__ = __config.get(self.__scaling_section, key)
218234
if not __temp__:
219235
raise Exception
220-
self.parameters.append((self.__scaling_options.get(key),__temp__))
236+
if self.__sanity_check and self.__scaling_options.get(key)[1] is not None:
237+
config_sanity.check_resource(self.region,self.aws_access_key_id, self.aws_secret_access_key,
238+
self.__scaling_options.get(key)[1],__temp__)
239+
self.parameters.append((self.__scaling_options.get(key)[0],__temp__))
221240
except ConfigParser.NoOptionError:
222241
pass
223242
except AttributeError:

cli/cfncluster/cli.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,15 @@ def update(args):
3939

4040
def main():
4141
# set up logging to file
42+
4243
if platform.system() is 'Windows':
44+
if not os.path.exists(os.path.expanduser('~\.cfncluster')):
45+
os.makedirs(os.path.expanduser('~\.cfncluster'))
4346
logfile = os.path.expanduser('~\.cfncluster\cfncluster-cli.log')
4447
else:
45-
logfile = '/tmp/cfncluster-cli.log'
48+
if not os.path.exists(os.path.expanduser('~/.cfncluster')):
49+
os.makedirs(os.path.expanduser('~/.cfncluster'))
50+
logfile = os.path.expanduser('~/.cfncluster/cfncluster-cli.log')
4651
logging.basicConfig(level=logging.DEBUG,
4752
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
4853
datefmt='%m-%d %H:%M',

cli/cfncluster/config_sanity.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright 2013-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the
4+
# License. A copy of the License is located at
5+
#
6+
# http://aws.amazon.com/asl/
7+
#
8+
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
9+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
10+
# limitations under the License.
11+
12+
__author__ = 'dougalb'
13+
14+
import boto.ec2
15+
import boto.vpc
16+
import urllib2
17+
import boto.exception
18+
import sys
19+
20+
def check_resource(region, aws_access_key_id, aws_secret_access_key, resource_type,resource_value):
21+
22+
# Loop over all supported resource checks
23+
# EC2 KeyPair
24+
if resource_type == 'EC2KeyPair':
25+
try:
26+
ec2_conn = boto.ec2.connect_to_region(region,aws_access_key_id=aws_access_key_id,
27+
aws_secret_access_key=aws_secret_access_key)
28+
test = ec2_conn.get_all_key_pairs(keynames=resource_value)
29+
except boto.exception.BotoServerError as e:
30+
print('Config sanity error: %s' % e.message)
31+
sys.exit(1)
32+
# VPC Id
33+
elif resource_type == 'VPC':
34+
try:
35+
vpc_conn = boto.vpc.connect_to_region(region,aws_access_key_id=aws_access_key_id,
36+
aws_secret_access_key=aws_secret_access_key)
37+
test = vpc_conn.get_all_vpcs(vpc_ids=resource_value)
38+
except boto.exception.BotoServerError as e:
39+
print('Config sanity error: %s' % e.message)
40+
sys.exit(1)
41+
# VPC Subnet Id
42+
elif resource_type == 'VPCSubnet':
43+
try:
44+
vpc_conn = boto.vpc.connect_to_region(region,aws_access_key_id=aws_access_key_id,
45+
aws_secret_access_key=aws_secret_access_key)
46+
test = vpc_conn.get_all_subnets(subnet_ids=resource_value)
47+
except boto.exception.BotoServerError as e:
48+
print('Config sanity error: %s' % e.message)
49+
sys.exit(1)
50+
# EC2 AMI Id
51+
elif resource_type == 'EC2Ami':
52+
try:
53+
ec2_conn = boto.ec2.connect_to_region(region,aws_access_key_id=aws_access_key_id,
54+
aws_secret_access_key=aws_secret_access_key)
55+
test = ec2_conn.get_all_images(image_ids=resource_value)
56+
except boto.exception.BotoServerError as e:
57+
print('Config sanity error: %s' % e.message)
58+
sys.exit(1)
59+
# EC2 Placement Group
60+
elif resource_type == 'EC2PlacementGroup':
61+
try:
62+
ec2_conn = boto.ec2.connect_to_region(region,aws_access_key_id=aws_access_key_id,
63+
aws_secret_access_key=aws_secret_access_key)
64+
test = ec2_conn.get_all_placement_groups(groupnames=resource_value)
65+
except boto.exception.BotoServerError as e:
66+
print('Config sanity error: %s' % e.message)
67+
sys.exit(1)
68+
# URL
69+
elif resource_type == 'URL':
70+
try:
71+
urllib2.urlopen(resource_value)
72+
except urllib2.HTTPError, e:
73+
print(e.code)
74+
sys.exit(1)
75+
except urllib2.URLError, e:
76+
print(e.args)
77+
sys.exit(1)
78+
# EC2 EBS Snapshot Id
79+
elif resource_type == 'EC2Snapshot':
80+
try:
81+
ec2_conn = boto.ec2.connect_to_region(region,aws_access_key_id=aws_access_key_id,
82+
aws_secret_access_key=aws_secret_access_key)
83+
test = ec2_conn.get_all_snapshots(snapshot_ids=resource_value)
84+
except boto.exception.BotoServerError as e:
85+
print('Config sanity error: %s' % e.message)
86+
sys.exit(1)

cli/cfncluster/examples/config

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
cluster_template = default
55
# Check for updates
66
update_check = true
7+
# Check config sanity. (Attempts to validate resources defined in parameters, actually exist)
8+
sanity_check = true
79

810
[aws]
911
# This is the AWS credentials section (required).
@@ -16,9 +18,6 @@ update_check = true
1618
# Uncomment to specify a different Amazon AWS region (OPTIONAL)
1719
#aws_region_name = us-west-2
1820

19-
[keypair mykey]
20-
key_location = /path/to/key.pem
21-
2221
## cfncluster templates
2322
[cluster default]
2423
# Name of an existing EC2 KeyPair to enable SSH access to the instances.
@@ -87,6 +86,9 @@ key_name = mykey
8786
# Encrypted ephemeral drives. In-memory keys, non-recoverable.
8887
# (defaults to false in default template)
8988
#encrypted_ephemeral = false
89+
# OS type used in the cluster
90+
# (defaults to centos6 in the default template)
91+
#base_os = centos6
9092
# Settings section relating to VPC to be used
9193
vpc_settings = public
9294
# Settings section relating to EBS volume
@@ -143,9 +145,21 @@ master_subnet_id = subnet-
143145
# Threshold for triggering CloudWatch ScaleUp action
144146
# (defaults to 4 for default template)
145147
#scaling_threshold = 4
148+
# Number of instances to add when called CloudWatch ScaleUp action
149+
# (defaults to 2 for default template)
150+
#scaling_adjustment = 2
151+
# Threshold for triggering CloudWatch ScaleUp action
152+
# (defaults to 4 for default template)
153+
#scaling_threshold2 = 200
154+
# Number of instances to add when called CloudWatch ScaleUp2 action
155+
# (defaults to 20 for default template)
156+
#scaling_adjustment = 20
146157
# Period to measure ScalingThreshold
147158
# (defaults to 60 for default template)
148159
#scaling_period = 60
149160
# Period to measure ScalingThreshold
150161
# (defaults to 2 for default template)
151-
#scaling_evaluation_periods = 2
162+
#scaling_evaluation_periods = 2
163+
# Amount of time in seconds to wait before attempting further scaling actions
164+
# (defaults to 120 for the default template
165+
#scaling_cooldown = 120

cli/setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ def read(fname):
2020
return open(os.path.join(os.path.dirname(__file__), fname)).read()
2121

2222
console_scripts = ['cfncluster = cfncluster.cli:main']
23-
version = "0.0.13"
24-
requires = ['boto>=2.32.1', 'botocore']
23+
version = "0.0.99"
24+
requires = ['boto>=2.33', 'botocore']
2525

2626
if sys.version_info[:2] == (2, 6):
2727
# For python2.6 we have to require argparse since it

0 commit comments

Comments
 (0)