Skip to content

Commit 173edba

Browse files
committed
ci: Add multi-az support
This change: * Refactors existing scripts to re-use common code * Adds support for building CI devstack on OpenStack * Captures devstack logs as ci artifacts * Adds a second worker node in a separate AZ The worker node is built asynchronously, so tests which don't require multi-az can proceed immediately the controller node is built as normal. The worker node will be automatically registered with the controller when it completes, so tests requiring multi-az can poll for creation of the AZ before starting. This change heavily touches AWS, but it has not been tested. We should assume that AWS support is broken.
1 parent 8b17f0e commit 173edba

20 files changed

+1078
-693
lines changed

hack/ci/.gitignore

Lines changed: 0 additions & 2 deletions
This file was deleted.

hack/ci/aws-project.sh

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright 2021 The Kubernetes Authors.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# hack script for preparing AWS to run cluster-api-provider-openstack e2e
18+
19+
set -x -o errexit -o nounset -o pipefail
20+
21+
function cloud_init {
22+
AWS_REGION=${AWS_REGION:-"eu-central-1"}
23+
AWS_ZONE=${AWS_ZONE:-"eu-central-1a"}
24+
# AMIs:
25+
# * capa-ami-ubuntu-20.04-1.20.4-00-1613898574 id: ami-0120656d38c206057
26+
# * ubuntu/images/hvm-ssd/ubuntu-focal-20.04-amd64-server-20210223 id: ami-0767046d1677be5a0
27+
AWS_AMI=${AWS_AMI:-"ami-0767046d1677be5a0"}
28+
# Choose via: https://eu-central-1.console.aws.amazon.com/ec2/v2/home?region=eu-central-1#InstanceTypes:
29+
AWS_MACHINE_TYPE=${AWS_MACHINE_TYPE:-"c5.metal"}
30+
AWS_NETWORK_NAME=${AWS_NETWORK_NAME:-"${CLUSTER_NAME}-mynetwork"}
31+
# prepare with:
32+
# * create key pair:
33+
# aws ec2 create-key-pair --key-name capo-e2e --query 'KeyMaterial' --region "${AWS_REGION}" --output text > ~/.ssh/aws-capo-e2e
34+
# * add to local agent and generate public key:
35+
# ssh-add ~/.ssh/aws-capo-e2e
36+
# ssh-keygen -y -f ~/.ssh/aws-capo-e2e > ~/.ssh/aws-capo-e2e.pub
37+
AWS_KEY_PAIR=${AWS_KEY_PAIR:-"capo-e2e"}
38+
# disable pagination of AWS cli
39+
export AWS_PAGER=""
40+
41+
echo "Using: AWS_REGION: ${AWS_REGION} AWS_NETWORK_NAME: ${AWS_NETWORK_NAME}"
42+
}
43+
44+
function init_infrastructure() {
45+
if [[ ${AWS_NETWORK_NAME} != "default" ]]; then
46+
if [[ $(aws ec2 describe-vpcs --filters Name=tag:Name,Values=capo-e2e-mynetwork --region="${AWS_REGION}" --query 'length(*[0])') = "0" ]];
47+
then
48+
aws ec2 create-vpc --cidr-block "$PRIVATE_NETWORK_CIDR" --tag-specifications "ResourceType=vpc,Tags=[{Key=Name,Value=${AWS_NETWORK_NAME}}]" --region="${AWS_REGION}"
49+
AWS_VPC_ID=$(aws ec2 describe-vpcs --filters Name=tag:Name,Values=capo-e2e-mynetwork --region "${AWS_REGION}" --query '*[0].VpcId' --output text)
50+
51+
aws ec2 create-subnet --cidr-block "$PRIVATE_NETWORK_CIDR" --vpc-id "${AWS_VPC_ID}" --tag-specifications "ResourceType=subnet,Tags=[{Key=Name,Value=${AWS_NETWORK_NAME}}]" --region "${AWS_REGION}" --availability-zone "${AWS_ZONE}"
52+
AWS_SUBNET_ID=$(aws ec2 describe-subnets --filters Name=tag:Name,Values=capo-e2e-mynetwork --region "${AWS_REGION}" --query '*[0].SubnetId' --output text)
53+
# It's also the route table of the VPC
54+
AWS_SUBNET_ROUTE_TABLE_ID=$(aws ec2 describe-route-tables --filters "Name=vpc-id,Values=${AWS_VPC_ID}" --region "${AWS_REGION}" --query '*[0].RouteTableId' --output text)
55+
56+
aws ec2 create-security-group --group-name "${AWS_NETWORK_NAME}" --description "${AWS_NETWORK_NAME}" --vpc-id "${AWS_VPC_ID}" --tag-specifications "ResourceType=security-group,Tags=[{Key=Name,Value=${AWS_NETWORK_NAME}}]" --region="${AWS_REGION}"
57+
AWS_SECURITY_GROUP_ID=$(aws ec2 describe-security-groups --filters Name=tag:Name,Values=capo-e2e-mynetwork --region "${AWS_REGION}" --query '*[0].GroupId' --output text)
58+
59+
aws ec2 authorize-security-group-ingress --group-id "${AWS_SECURITY_GROUP_ID}" --protocol tcp --port 22 --cidr 0.0.0.0/0 --region="${AWS_REGION}"
60+
61+
# Documentation to enable internet access for subnet:
62+
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/TroubleshootingInstancesConnecting.html#TroubleshootingInstancesConnectionTimeout
63+
aws ec2 create-internet-gateway --tag-specifications "ResourceType=internet-gateway,Tags=[{Key=Name,Value=${AWS_NETWORK_NAME}}]" --region="${AWS_REGION}"
64+
aws ec2 attach-internet-gateway --internet-gateway-id "${AWS_INTERNET_GATEWAY_ID}" --vpc-id "${AWS_VPC_ID}" --region="${AWS_REGION}"
65+
AWS_INTERNET_GATEWAY_ID=$(aws ec2 describe-internet-gateways --filters Name=tag:Name,Values=capo-e2e-mynetwork --region "${AWS_REGION}" --query '*[0].InternetGatewayId' --output text)
66+
67+
aws ec2 create-route --route-table-id "${AWS_SUBNET_ROUTE_TABLE_ID}" --destination-cidr-block 0.0.0.0/0 --gateway-id "${AWS_INTERNET_GATEWAY_ID}" --region "${AWS_REGION}"
68+
aws ec2 create-route --route-table-id "${AWS_SUBNET_ROUTE_TABLE_ID}" --destination-ipv6-cidr-block ::/0 --gateway-id "${AWS_INTERNET_GATEWAY_ID}" --region "${AWS_REGION}"
69+
fi
70+
fi
71+
}
72+
73+
function create_vm {
74+
local name=$1 && shift
75+
local ip=$1 && shift
76+
local userdata=$1 && shift
77+
local public=$1 && shift # Unused by AWS
78+
79+
if [[ $(aws ec2 describe-instances --filters Name=tag:Name,Values="${name}" --region="${AWS_REGION}" --query 'length(*[0])') = "0" ]];
80+
then
81+
AWS_SUBNET_ID=$(aws ec2 describe-subnets --filters Name=tag:Name,Values=capo-e2e-mynetwork --region "${AWS_REGION}" --query '*[0].SubnetId' --output text)
82+
AWS_SECURITY_GROUP_ID=$(aws ec2 describe-security-groups --filters Name=tag:Name,Values=capo-e2e-mynetwork --region "${AWS_REGION}" --query '*[0].GroupId' --output text)
83+
84+
# /dev/sda1 is renamed to /dev/nvme0n1 by AWS
85+
aws ec2 run-instances --tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=${name}}]" \
86+
--region "${AWS_REGION}" \
87+
--placement "AvailabilityZone=${AWS_ZONE}" \
88+
--image-id "${AWS_AMI}" \
89+
--instance-type "${AWS_MACHINE_TYPE}" \
90+
--block-device-mappings 'DeviceName=/dev/sda1,Ebs={VolumeSize=300}' \
91+
--subnet-id "${AWS_SUBNET_ID}" \
92+
--private-ip-address "${ip}" \
93+
--count 1 \
94+
--associate-public-ip-address \
95+
--security-group-ids "${AWS_SECURITY_GROUP_ID}" \
96+
--key-name "${AWS_KEY_PAIR}" \
97+
--user-data "file://${userdata}" \
98+
--no-paginate
99+
fi
100+
101+
# wait a bit so the server has time to get a public ip
102+
sleep 30
103+
}
104+
105+
function get_public_ip {
106+
aws ec2 describe-instances --filters "Name=tag:Name,Values=${CLUSTER_NAME}-controller" --region "${AWS_REGION}" \
107+
--query 'Reservations[*].Instances[*].PublicIpAddress' --output text
108+
}
109+
110+
function get_mtu {
111+
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/network_mtu.html
112+
echo 1300
113+
}
114+
115+
function get_ssh_public_key {
116+
cat "${SSH_PUBLIC_KEY_FILE}"
117+
}
118+
119+
function get_ssh_private_key_file {
120+
echo "${SSH_PRIVATE_KEY_FILE}"
121+
}
122+
123+
function cloud_cleanup {
124+
echo Not implemented
125+
exit 1
126+
}

hack/ci/cloud-init/common.yaml.tpl

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#cloud-config
2+
runcmd:
3+
- sysctl -p /etc/sysctl.d/devstack.conf
4+
- /root/devstack.sh
5+
final_message: "The system is finally up, after $UPTIME seconds"
6+
users:
7+
- name: cloud
8+
lock_passwd: true
9+
sudo: ALL=(ALL) NOPASSWD:ALL
10+
ssh_authorized_keys:
11+
- ${SSH_PUBLIC_KEY}
12+
# Infrastructure packages required:
13+
# python3 - required by sshuttle
14+
# git - required to obtain devstack
15+
# jq - required by devstack-common.sh
16+
packages:
17+
- python3
18+
- git
19+
- jq
20+
package_upgrade: true
21+
write_files:
22+
- path: /etc/sysctl.d/devstack.conf
23+
permissions: 0644
24+
content: |
25+
net.ipv4.ip_forward=1
26+
net.ipv4.conf.default.rp_filter=0
27+
net.ipv4.conf.all.rp_filter=0
28+
- path: /tmp/devstack-common.sh
29+
permissions: 0644
30+
content: |
31+
# ensure nested virtualization
32+
function ensure_kvm {
33+
sudo modprobe kvm-intel
34+
if [ ! -c /dev/kvm ]; then
35+
echo /dev/kvm is not present
36+
exit 1
37+
fi
38+
}
39+
40+
function run_devstack {
41+
su - stack -c "TERM=vt100 /opt/stack/devstack/stack.sh"
42+
}
43+
44+
function upload_images {
45+
# Add environment variables for auth/endpoints
46+
echo 'source /opt/stack/devstack/openrc admin admin' >> /opt/stack/.bashrc
47+
48+
# Upload the images so we don't have to upload them from Prow
49+
su - stack -c "source /opt/stack/devstack/openrc admin admin && /opt/stack/devstack/tools/upload_image.sh https://storage.googleapis.com/artifacts.k8s-staging-capi-openstack.appspot.com/test/ubuntu/2021-03-27/ubuntu-2004-kube-v1.18.15.qcow2"
50+
su - stack -c "source /opt/stack/devstack/openrc admin admin && /opt/stack/devstack/tools/upload_image.sh https://storage.googleapis.com/artifacts.k8s-staging-capi-openstack.appspot.com/test/cirros/2021-03-27/cirros-0.5.1-x86_64-disk.img"
51+
}
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
- path: /tmp/local.conf
2+
permissions: 0644
3+
content: |
4+
[[local|localrc]]
5+
GIT_BASE=https://github.com
6+
HOST_IP=${HOST_IP}
7+
SERVICE_TIMEOUT=240
8+
FLOATING_RANGE=${FLOATING_RANGE}
9+
10+
# Enable Logging
11+
LOGFILE=/opt/stack/logs/stack.sh.log
12+
VERBOSE=True
13+
LOG_COLOR=True
14+
15+
# Neutron
16+
enable_plugin neutron https://github.com/openstack/neutron stable/${OPENSTACK_RELEASE}
17+
18+
# Octavia
19+
enable_plugin octavia https://github.com/openstack/octavia stable/${OPENSTACK_RELEASE}
20+
enable_plugin octavia-dashboard https://github.com/openstack/octavia-dashboard stable/${OPENSTACK_RELEASE}
21+
22+
DATABASE_PASSWORD=secretdatabase
23+
RABBIT_PASSWORD=secretrabbit
24+
ADMIN_PASSWORD=secretadmin
25+
SERVICE_PASSWORD=secretservice
26+
SERVICE_TOKEN=111222333444
27+
28+
# Pre-requisite
29+
ENABLED_SERVICES=key,rabbit,mysql
30+
# Nova
31+
ENABLED_SERVICES+=,n-api,n-obj,n-cpu,n-cond,n-sch,n-novnc,n-api-meta
32+
# Placement service needed for Nova
33+
ENABLED_SERVICES+=,placement-api,placement-client
34+
# Glance
35+
ENABLED_SERVICES+=,g-api,g-reg
36+
37+
# Octavia-Neutron
38+
ENABLED_SERVICES+=,neutron-api,neutron-agent,neutron-dhcp,neutron-l3
39+
ENABLED_SERVICES+=,neutron-metadata-agent,neutron-qos
40+
# Octavia
41+
ENABLED_SERVICES+=,octavia,o-api,o-cw,o-hm,o-hk,o-da
42+
43+
# Horizon (enable for manual tests)
44+
# ENABLED_SERVICES+=,horizon
45+
46+
# Cinder
47+
ENABLED_SERVICES+=,c-sch,c-api,c-vol
48+
49+
# Additional services
50+
ENABLED_SERVICES+=${OPENSTACK_ADDITIONAL_SERVICES}
51+
52+
LIBVIRT_TYPE=kvm
53+
54+
# Don't download default images, just our test images
55+
DOWNLOAD_DEFAULT_IMAGES=False
56+
# We upload the Amphora image so it doesn't have to be build
57+
IMAGE_URLS="https://storage.googleapis.com/artifacts.k8s-staging-capi-openstack.appspot.com/test/amphora/2021-03-27/amphora-x64-haproxy.qcow2"
58+
59+
[[post-config|$NOVA_CONF]]
60+
[DEFAULT]
61+
# On GCE's n2-standard-16 an allocation ratio of 2.0 gives us 32 vCPUS,
62+
# which is enough to run any 2 test clusters concurrently.
63+
cpu_allocation_ratio = 2.0
64+
65+
# We ensure that the controller has capacity to run all workloads, and that
66+
# all workloads run on the controller unless explicitly scheduled to the
67+
# worker. This prevents non-deterministic failures of multi-AZ tests due to
68+
# capacity on the worker.
69+
default_schedule_zone = ${PRIMARY_AZ}
70+
71+
[scheduler]
72+
# query_placement_for_availability_zone is the default from Xena
73+
query_placement_for_availability_zone = True
74+
75+
[[post-config|$CINDER_CONF]]
76+
[DEFAULT]
77+
storage_availability_zone = ${PRIMARY_AZ}
78+
79+
[[post-config|/$NEUTRON_CORE_PLUGIN_CONF]]
80+
[ml2]
81+
path_mtu = ${MTU}
82+
- path: /tmp/register-worker.sh
83+
permissions: 0755
84+
content: |
85+
#!/bin/bash
86+
87+
source /opt/stack/devstack/openrc admin admin
88+
89+
# Wait until the worker shows up as a second compute service
90+
while [ $(openstack compute service list --service nova-compute -f value | wc -l) -lt 2 ]
91+
do
92+
sleep 60
93+
done
94+
95+
nova-manage cell_v2 discover_hosts
96+
97+
# Look for hypervisors other than the current host and add them to a
98+
# secondary AZ
99+
if ! openstack aggregate show ${SECONDARY_AZ} > /dev/null 2>&1; then
100+
openstack aggregate create --zone ${SECONDARY_AZ} ${SECONDARY_AZ}
101+
fi
102+
103+
for hypervisor in $(openstack hypervisor list -f value -c "Hypervisor Hostname" 2>/dev/null | grep -v $(hostname)); do
104+
openstack aggregate add host ${SECONDARY_AZ} ${hypervisor}
105+
done
106+
- path: /etc/systemd/system/register-worker.service
107+
permissions: 0644
108+
content: |
109+
[Unit]
110+
Description=Register devstack worker node
111+
112+
[Service]
113+
Type=oneshot
114+
User=stack
115+
ExecStart=/tmp/register-worker.sh
116+
Environment=TERM=ansi
117+
118+
[Install]
119+
WantedBy=multi-user.target
120+
- path: /root/devstack.sh
121+
permissions: 0755
122+
content: |
123+
#!/bin/bash
124+
125+
set -x -o errexit -o nounset -o pipefail
126+
127+
source /tmp/devstack-common.sh
128+
129+
ensure_kvm
130+
131+
# from https://raw.githubusercontent.com/openstack/octavia/master/devstack/contrib/new-octavia-devstack.sh
132+
git clone -b stable/${OPENSTACK_RELEASE} https://github.com/openstack/devstack.git /tmp/devstack
133+
cp /tmp/local.conf /tmp/devstack/
134+
135+
# Create the stack user
136+
HOST_IP=${HOST_IP} /tmp/devstack/tools/create-stack-user.sh
137+
chmod 0755 /opt/stack
138+
139+
# Move everything into place (/opt/stack is the $HOME folder of the stack user)
140+
mv /tmp/devstack /opt/stack/
141+
chown -R stack:stack /opt/stack/devstack/
142+
143+
run_devstack
144+
upload_images
145+
146+
# When using ML2/OVS all public traffic will be routed via the L3 agent,
147+
# which is only running on the controller
148+
INTERFACE=$(ip -j addr show | jq -re 'map(select(.addr_info | map(.local == "${HOST_IP}") | any)) | first | .ifname')
149+
sudo iptables -t nat -I POSTROUTING -o ${INTERFACE} -s ${FLOATING_RANGE} -j MASQUERADE
150+
sudo iptables -I FORWARD -s ${FLOATING_RANGE} -j ACCEPT
151+
152+
# Start polling for the worker node
153+
# We defined the register-worker unit above
154+
systemctl daemon-reload
155+
systemctl start --no-block register-worker

0 commit comments

Comments
 (0)