Skip to content

Commit 55186e8

Browse files
committed
Merge pull request #3 from awslabs/develop
Develop
2 parents 0efaca0 + 1c38c85 commit 55186e8

File tree

16 files changed

+461
-538
lines changed

16 files changed

+461
-538
lines changed

CHANGELOG.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,20 @@
22
CHANGELOG
33
=========
44

5+
0.0.7
6+
=====
7+
8+
* feature:``cfncluster``: Added option to encrypt ephemeral drives with in-memory keys
9+
* feature:``cfncluster``: Detect all ephemeral drives, stripe and mount as /scratch
10+
* feature:``cfncluster``: Support for placement groups
11+
* feature:``cfncluster``: Support for cluster placement logic. Can either be cluster or compute.
12+
* feature:``cfncluster``: Added option to provides arguments to pre/post install scripts
13+
* feature:``cfncluster``: Added DKMS support for Lustre filesystems - http://zfsonlinux.org/lustre.html
14+
* bugfix:``cli``: Added missing support from SSH from CIDR range
15+
* bugfix:``cfncluster``: Fixed Ganglia setup for ComputeFleet
16+
* updates:``SGE``: Updated to 8.1.7 - https://arc.liv.ac.uk/trac/SGE
17+
* updates:``Openlava``: Updated to latest Git for Openlava 2.2 - https://github.com/openlava/openlava
18+
519
0.0.6
620
=====
721

amis.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
us-west-2 ami-e581fcd5
2-
us-east-1 ami-745ea11c
3-
eu-west-1 ami-e3458c94
4-
ap-northeast-1 ami-2d41092c
1+
us-west-2 ami-7dcab74d
2+
us-east-1 ami-2c07f944
3+
eu-west-1 ami-a1a169d6
4+
ap-northeast-1 ami-b3c78fb2

bootstrap/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
SHELL = /bin/sh
2-
PREFIX ?= /opt/cfncluster
2+
DESTDIR ?= /opt/cfncluster
33

44
install:
55
install -d -m 755 $(DESTDIR)

bootstrap/src/scripts/boot_as_compute

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,101 @@ function error_exit () {
2525
# Run preinstall script if defined
2626
RC=0
2727
if [ "${cfn_preinstall}" != "NONE" ]; then
28-
wget -qO- ${cfn_preinstall} | /bin/sh || RC=1
28+
tmpfile=$(mktemp)
29+
wget -qO- ${cfn_preinstall} > $tmpfile || RC=1
30+
if [ "${cfn_preinstall_args}" != "NONE" ]; then
31+
args=${cfn_preinstall_args}
32+
fi
33+
/bin/sh $tmpfile $args || RC=1
34+
/bin/rm $tmpfile
2935
fi
3036
if [ $RC -ne 0 ]; then
3137
error_exit "Failed to run boot_as_compute preinstall"
3238
fi
3339

40+
## Non-scheduler specific functions
41+
##
42+
43+
# LVM, format, mount /ephemeral
44+
RC=0
45+
mkdir -p /scratch
46+
chmod 1777 /scratch
47+
MAPPING=$(/usr/bin/ec2-metadata -b | grep ephemeral | awk '{print $2}' | sed 's/sd/xvd/')
48+
for m in $MAPPING; do
49+
stat -t /dev/${m} >/dev/null 2>&1
50+
check=$?
51+
if [ ${check} -eq 0 ]; then
52+
DEVS="${m} $DEVS"
53+
fi
54+
done
55+
NUM_DEVS=0
56+
for d in $DEVS; do
57+
d=/dev/${d}
58+
dd if=/dev/zero of=${d} bs=32k count=1
59+
parted -s ${d} mklabel msdos
60+
parted -s ${d}
61+
parted -s -a optimal ${d} mkpart primary 1MB 100%
62+
parted -s ${d} set 1 lvm on
63+
let NUM_DEVS++
64+
PARTITIONS="${d}1 $PARTITIONS"
65+
done
66+
# sleep 10 seconds to let partitions settle (bug?)
67+
sleep 10
68+
69+
# Setup LVM
70+
pvcreate $PARTITIONS
71+
vgcreate vg.01 $PARTITIONS
72+
lvcreate -i $NUM_DEVS -I 64 -l 100%FREE -n lv_ephemeral vg.01
73+
if [ "$cfn_encrypted_ephemeral" == "true" ]; then
74+
mkfs -q /dev/ram1 1024
75+
mkdir -p /root/keystore
76+
mount /dev/ram1 /root/keystore
77+
dd if=/dev/urandom of=/root/keystore/keyfile bs=1024 count=4
78+
chmod 0400 /root/keystore/keyfile
79+
cryptsetup -q luksFormat /dev/vg.01/lv_ephemeral /root/keystore/keyfile
80+
cryptsetup -d /root/keystore/keyfile luksOpen /dev/vg.01/lv_ephemeral ephemeral_luks
81+
mkfs.xfs /dev/mapper/ephemeral_luks
82+
mount -v -t xfs -o noatime,nodiratime /dev/mapper/ephemeral_luks /scratch
83+
else
84+
mkfs.xfs /dev/vg.01/lv_ephemeral
85+
echo "/dev/vg.01/lv_ephemeral /scratch xfs noatime,nodiratime 0 0" >> /etc/fstab
86+
mount -v /scratch
87+
fi
88+
chmod 1777 /scratch
89+
90+
# Mount NFS exports
91+
RC=0
92+
echo "$cfn_master:/home /home nfs hard,intr,noatime,vers=3,_netdev 0 0" >> /etc/fstab || RC=1
93+
echo "$cfn_master:/shared /shared nfs hard,intr,noatime,vers=3,_netdev 0 0" >> /etc/fstab || RC=1
94+
mount -v /home || RC=1
95+
mount -v /shared || RC=1
96+
if [ $RC -ne 0 ]; then
97+
error_exit "Failed during during NFS mounts"
98+
fi
99+
100+
# Configure ganglia
101+
RC=0
102+
location=`curl --retry 3 --retry-delay 0 --silent --fail http://169.254.169.254/latest/meta-data/placement/availability-zone` || RC=1
103+
cd /etc/ganglia || RC=1
104+
/bin/cp -f /opt/cfncluster/templates/os/gmond.conf.COMPUTE gmond.conf || RC=1
105+
sed -i "s/<master>/${cfn_master}/" gmond.conf || RC=1
106+
sed -i "s/<location>/$location/" gmond.conf || RC=1
107+
chkconfig gmond on || RC=1
108+
service gmond start || RC=1
109+
if [ $RC -ne 0 ]; then
110+
error_exit "Failed during Ganglia setup"
111+
fi
112+
113+
# Adding nodewatcher to crontab
114+
RC=0
115+
crontab -l > /tmp/root.crontab
116+
echo "* * * * * cd /opt/cfncluster/nodewatcher && ./nodewatcher.py >> nodewatcher.log 2>&1" >> /tmp/root.crontab || RC=1
117+
crontab /tmp/root.crontab || RC=1
118+
if [ $RC -ne 0 ]; then
119+
error_exit "Failed to nodewatcher crontab"
120+
fi
121+
122+
##
34123
# Run boot as compute for a specific scheduler
35124
RC=0
36125
/opt/cfncluster/scripts/${cfn_scheduler}/boot_as_compute >/var/log/cfncluster.log 2>&1 || RC=1
@@ -41,7 +130,13 @@ fi
41130
# Run postinstall script if defined
42131
RC=0
43132
if [ "${cfn_postinstall}" != "NONE" ]; then
44-
wget -qO- ${cfn_postinstall} | /bin/sh || RC=1
133+
tmpfile=$(mktemp)
134+
wget -qO- ${cfn_postinstall} > $tmpfile || RC=1
135+
if [ "${cfn_postinstall_args}" != "NONE" ]; then
136+
args=${cfn_postinstall_args}
137+
fi
138+
/bin/sh $tmpfile $args || RC=1
139+
/bin/rm $tmpfile
45140
fi
46141
if [ $RC -ne 0 ]; then
47142
error_exit "Failed to run boot_as_compute postinstall"

bootstrap/src/scripts/boot_as_master

Lines changed: 170 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,174 @@ function error_exit () {
2525
# Run preinstall script if defined
2626
RC=0
2727
if [ "${cfn_preinstall}" != "NONE" ]; then
28-
wget -qO- ${cfn_preinstall} | /bin/sh || RC=1
28+
tmpfile=$(mktemp)
29+
wget -qO- ${cfn_preinstall} > $tmpfile || RC=1
30+
if [ "${cfn_preinstall_args}" != "NONE" ]; then
31+
args=${cfn_preinstall_args}
32+
fi
33+
/bin/sh $tmpfile $args || RC=1
34+
/bin/rm $tmpfile
2935
fi
3036
if [ $RC -ne 0 ]; then
3137
error_exit "Failed to run boot_as_master preinstall"
3238
fi
3339

40+
## Non-scheduler specific functions
41+
##
42+
43+
# Check cfn_volume is present in config
44+
if [ "${cfn_volume}x" == "x" ]; then
45+
error_exit "Volume must be provided."
46+
fi
47+
48+
# Check hostname resolves using DNS
49+
myhostname=$(hostname -s)
50+
if [ $? != 0 ]; then
51+
error_exit 'Failed to determine local hostname'
52+
fi
53+
54+
# Enable PAT
55+
RC=0
56+
/opt/cfncluster/scripts/os/configure-pat.sh || RC=1
57+
echo -e "\n# Enable PAT\n/opt/cfncluster/scripts/os/configure-pat.sh\n\n" >> /etc/rc.local || RC=1
58+
if [ $RC -ne 0 ]; then
59+
error_exit "Failed to enable NAT(PAT)"
60+
fi
61+
62+
# LVM, format, mount /ephemeral
63+
RC=0
64+
mkdir -p /scratch
65+
chmod 1777 /scratch
66+
MAPPING=$(/usr/bin/ec2-metadata -b | grep ephemeral | awk '{print $2}' | sed 's/sd/xvd/')
67+
for m in $MAPPING; do
68+
stat -t /dev/${m} >/dev/null 2>&1
69+
check=$?
70+
if [ ${check} -eq 0 ]; then
71+
DEVS="${m} $DEVS"
72+
fi
73+
done
74+
NUM_DEVS=0
75+
for d in $DEVS; do
76+
d=/dev/${d}
77+
dd if=/dev/zero of=${d} bs=32k count=1
78+
parted -s ${d} mklabel msdos
79+
parted -s ${d}
80+
parted -s -a optimal ${d} mkpart primary 1MB 100%
81+
parted -s ${d} set 1 lvm on
82+
let NUM_DEVS++
83+
PARTITIONS="${d}1 $PARTITIONS"
84+
done
85+
# sleep 10 seconds to let partitions settle (bug?)
86+
sleep 10
87+
88+
# Setup LVM
89+
pvcreate $PARTITIONS
90+
vgcreate vg.01 $PARTITIONS
91+
lvcreate -i $NUM_DEVS -I 64 -l 100%FREE -n lv_ephemeral vg.01
92+
if [ "$cfn_encrypted_ephemeral" == "true" ]; then
93+
mkfs -q /dev/ram1 1024
94+
mkdir -p /root/keystore
95+
mount /dev/ram1 /root/keystore
96+
dd if=/dev/urandom of=/root/keystore/keyfile bs=1024 count=4
97+
chmod 0400 /root/keystore/keyfile
98+
cryptsetup -q luksFormat /dev/vg.01/lv_ephemeral /root/keystore/keyfile
99+
cryptsetup -d /root/keystore/keyfile luksOpen /dev/vg.01/lv_ephemeral ephemeral_luks
100+
mkfs.xfs /dev/mapper/ephemeral_luks
101+
mount -v -t xfs -o noatime,nodiratime /dev/mapper/ephemeral_luks /scratch
102+
else
103+
mkfs.xfs /dev/vg.01/lv_ephemeral
104+
echo "/dev/vg.01/lv_ephemeral /scratch xfs noatime,nodiratime 0 0" >> /etc/fstab
105+
mount -v /scratch
106+
fi
107+
chmod 1777 /scratch
108+
109+
# Attach and mount /shared volume
110+
RC=0
111+
/usr/local/sbin/attachVolume.py ${cfn_volume} || RC=1
112+
sleep 10 # Hate having to do this...
113+
dev=$(stat /dev/disk/by-ebs-volumeid/${cfn_volume}|grep -- 'File:'|awk '{print $4}'|cut -d'/' -f3|tr -d "'")
114+
fs_type=$(blkid -o list | grep -- "$dev" | awk '{print $2}')
115+
if [ "${fs_type}x" == "x" ]; then
116+
mkfs.xfs /dev/disk/by-ebs-volumeid/${cfn_volume} || RC=1
117+
sleep 5
118+
fi
119+
fs_type=$(blkid -o list | grep -- "$dev" | awk '{print $2}')
120+
echo "/dev/disk/by-ebs-volumeid/${cfn_volume} /shared $fs_type noatime,nodiratime 0 0" >> /etc/fstab
121+
mount -v /shared || RC=1
122+
chmod 1777 /shared || RC=1
123+
if [ $RC -ne 0 ]; then
124+
error_exit "Failed to attach and mount volume"
125+
fi
126+
127+
# Setup NFS as Master
128+
# 1. Determine subnet for NFS exports
129+
ETH0_MAC=`/sbin/ifconfig | /bin/grep eth0 | awk '{print tolower($5)}' | grep '^[0-9a-f]\{2\}\(:[0-9a-f]\{2\}\)\{5\}$'`
130+
VPC_CIDR_URI="http://169.254.169.254/latest/meta-data/network/interfaces/macs/${ETH0_MAC}/vpc-ipv4-cidr-block"
131+
VPC_CIDR_RANGE=`curl --retry 3 --retry-delay 0 --silent --fail ${VPC_CIDR_URI}`
132+
if [ $? -ne 0 ] ; then
133+
echo "Unable to retrive VPC CIDR range from meta-data. This either means a) non-VPC or b) an error" | logger -t "cfncluster"
134+
VPC_CIDR_RANGE="10.0.0.0/8"
135+
else
136+
echo "Retrived the VPC CIDR range: ${VPC_CIDR_RANGE} from meta-data for NFS export." | logger -t "cfncluster"
137+
fi
138+
# 2. Update config
139+
RC=0
140+
cd /etc || RC=1
141+
/bin/cp -f /opt/cfncluster/templates/os/exports.MASTER exports || RC=1
142+
sed -i "s?<cidr>?$VPC_CIDR_RANGE?" exports || RC=1
143+
if [ $RC -ne 0 ]; then
144+
error_exit "Failed to configure NFS exports"
145+
fi
146+
# 3. Start NFS
147+
RC=0
148+
chkconfig nfs on || RC=1
149+
chkconfig rpcbind on || RC=1
150+
chkconfig rpcidmapd on || RC=1
151+
service rpcbind restart || RC=1
152+
service rpcidmapd restart || RC=1
153+
service nfs restart || RC=1
154+
if [ $RC -ne 0 ]; then
155+
error_exit "Failed to start NFS server"
156+
fi
157+
158+
# Setup Ganglia as Master
159+
RC=0
160+
location=`curl --retry 3 --retry-delay 0 --silent --fail http://169.254.169.254/latest/meta-data/placement/availability-zone` || RC=1
161+
cd /etc/ganglia || RC=1
162+
/bin/cp -f /opt/cfncluster/templates/os/gmond.conf.MASTER gmond.conf || RC=1
163+
/bin/cp -f /opt/cfncluster/templates/os/gmetad.conf.MASTER gmetad.conf || RC=1
164+
sed -i "s/<master>/$myhostname/" gmond.conf || RC=1
165+
sed -i "s/<location>/$location/" gmond.conf || RC=1
166+
sed -i "s/<stack_name>/$stack_name/" gmond.conf || RC=1
167+
sed -i "s/<stack_name>/$stack_name/" gmetad.conf || RC=1
168+
if [ $RC -ne 0 ]; then
169+
error_exit "Failed to configure Ganglia"
170+
fi
171+
172+
# Start httpd and ganglia services
173+
RC=0
174+
chkconfig gmond on || RC=1
175+
chkconfig gmetad on || RC=1
176+
chkconfig httpd on || RC=1
177+
service gmond start || RC=1
178+
service gmetad start || RC=1
179+
service httpd start || RC=1
180+
if [ $RC -ne 0 ]; then
181+
error_exit "Failed to start Ganglia"
182+
fi
183+
184+
# Setup ec2-user SSH auth
185+
RC=0
186+
su - ec2-user -c "ssh-keygen -q -t rsa -f ~/.ssh/id_rsa -N ''" || RC=1
187+
su - ec2-user -c "cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys2 && chmod 0600 ~/.ssh/authorized_keys2" || RC=1
188+
su - ec2-user -c "ssh-keyscan ${myhostname} > ~/.ssh/known_hosts && chmod 0600 ~/.ssh/known_hosts" || RC=1
189+
if [ $RC -ne 0 ]; then
190+
error_exit "Failed to setup ec2-user SSH auth"
191+
fi
192+
193+
## Scheduler specific section
194+
##
195+
34196
# Run boot as master for a specific scheduler
35197
RC=0
36198
/opt/cfncluster/scripts/${cfn_scheduler}/boot_as_master >/var/log/cfncluster.log 2>&1 || RC=1
@@ -44,7 +206,13 @@ cd /opt/cfncluster/sqswatcher && ./sqswatcher.py 2>&1
44206
# Run postinstall script if defined
45207
RC=0
46208
if [ "${cfn_postinstall}" != "NONE" ]; then
47-
wget -qO- ${cfn_postinstall} | /bin/sh || RC=1
209+
tmpfile=$(mktemp)
210+
wget -qO- ${cfn_postinstall} > $tmpfile || RC=1
211+
if [ "${cfn_postinstall_args}" != "NONE" ]; then
212+
args=${cfn_postinstall_args}
213+
fi
214+
/bin/sh $tmpfile $args || RC=1
215+
/bin/rm $tmpfile
48216
fi
49217
if [ $RC -ne 0 ]; then
50218
error_exit "Failed to run boot_as_master postinstall"

0 commit comments

Comments
 (0)