Skip to content

Commit b36103d

Browse files
templecloudowainlewis
authored andcommitted
Adding canary 'monitor' mode. (#241)
Adding canary monitor mode.
1 parent d94b7a7 commit b36103d

File tree

8 files changed

+173
-65
lines changed

8 files changed

+173
-65
lines changed

Makefile

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,22 +91,28 @@ upgrade:
9191
# Requires a 'dist/oci-cloud-controller-manager-rollback.yaml' manifest. Requires $KUBECONFIG set.
9292
.PHONY: rollback
9393
rollback:
94-
# Rollback the current CCM to the specified version
94+
# Rollback the current CCM to the specified version.
9595
@./hack/deploy.sh rollback-original-ccm
9696

9797
.PHONY: e2e
9898
e2e:
9999
@./hack/test-e2e.sh
100100

101-
# Run the canary tests.
102-
.PHONY: canary
103-
canary:
104-
@./hack/test-canary.sh
101+
# Run the canary tests - in single run mode.
102+
.PHONY: canary-run-once
103+
canary-run-once:
104+
@./hack/test-canary.sh run-once
105105

106-
# Validate the generated canary test image.
106+
# Run the canary tests - in monitor (infinite loop) mode.
107+
.PHONY: canary-monitor
108+
canary-monitor:
109+
@./hack/test-canary.sh monitor
110+
111+
# Validate the generated canary test image. Runs test once
112+
# and monitors from sidecar.
107113
.PHONY: validate-canary
108114
validate-canary:
109-
@./hack/validate-canary.sh
115+
@./hack/validate-canary.sh run
110116

111117
.PHONY: clean
112118
clean:

hack/ccm-canary-entrypoint.sh

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/bash
2+
3+
# Copyright 2018 Oracle and/or its affiliates. All rights reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
echo "\$METRICS_FILE: ${METRICS_FILE}"
18+
echo "\$MONITOR_PERIOD: ${MONITOR_PERIOD}"
19+
20+
# For OCI usage canary mode is the default
21+
if [ -z "${CANARY_MODE}" ]; then
22+
export CANARY_MODE="monitor"
23+
fi
24+
25+
pushd "${GOPATH}/src/github.com/oracle/oci-cloud-controller-manager"
26+
./hack/test-canary.sh ${CANARY_MODE}
27+
popd

hack/test-canary.sh

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ function run_canary_tests() {
3232
ginkgo -v -progress -noColor=true \
3333
-focus "\[Canary\]" \
3434
test/e2e \
35-
-- --kubeconfig=${KUBECONFIG} --delete-namespace=false \
35+
-- --kubeconfig=${KUBECONFIG} --delete-namespace=true \
3636
2>&1 | tee "${TEST_LOG}"
3737
}
3838

@@ -75,15 +75,16 @@ function create_results() {
7575
echo "Creating result file: ${METRICS_FILE}"
7676
cat > "${METRICS_FILE}" <<EOF
7777
{
78-
"start_time": "${START}"
79-
"create_lb": "$(extract_result ${CREATE_LB_TEST})"
78+
"start_time": "${START}",
79+
"create_lb": "$(extract_result ${CREATE_LB_TEST})",
8080
"end_time": "$(now)"
8181
}
8282
EOF
8383
}
8484

85-
# Run the tests and extract the results
86-
function run() {
85+
# Run the tests once and extract the results.
86+
function run-once() {
87+
START=$(now)
8788
init_results
8889
cat "${METRICS_FILE}"
8990
run_canary_tests
@@ -95,13 +96,34 @@ function run() {
9596

9697
# Helper function to clean up log and json files.
9798
function clean() {
98-
kubectl get pods --all-namespaces | grep ccm | awk '{print $1}' | xargs kubectl delete ns
99-
rm "${TEST_DIR}/${TEST_PREFIX}*"
99+
echo "ensuring fresh \$START."
100+
unset START
101+
echo "ensuring fresh ${TEST_LOG} file."
102+
rm -f "${TEST_LOG}"
103+
echo "ensuring fresh ${METRICS_FILE} result file."
104+
rm -f "${METRICS_FILE}"
105+
echo "ensuring all 'cm-e2e-tests' namespaces are terminated."
106+
local res=$(kubectl get ns | grep 'cm-e2e-tests-' | awk '{print $1}')
107+
if [ ! -z "${res}" ]; then
108+
echo ${res} | xargs kubectl delete ns 2> /dev/null
109+
fi
110+
}
111+
112+
# Run the tests in loop with the specified wait period.
113+
function monitor() {
114+
local period=${1:-$MONITOR_PERIOD}
115+
while true;
116+
do
117+
clean && run-once
118+
echo "Sleeping for ${period} before next run..."
119+
sleep "${period}"
120+
done
100121
}
101122

102123
# Main ************************************************************************
103124
#
104125

126+
# Handle mandatory KUBECONFIG requirement.
105127
if [ -z "${KUBECONFIG}" ]; then
106128
if [ -z "${KUBECONFIG_VAR}" ]; then
107129
echo "KUBECONFIG or KUBECONFIG_VAR must be set"
@@ -113,26 +135,31 @@ if [ -z "${KUBECONFIG}" ]; then
113135
fi
114136
fi
115137

116-
START=$(now)
138+
# If not specified, default mandatory 'metrics file' location.
139+
if [ -z "${METRICS_FILE}" ]; then
140+
export METRICS_FILE=/tmp/ccm-canary-metrics.json
141+
fi
117142

118-
TEST_ID=""
119-
if [ "${UNIQUE_TEST_ID}" = true ]; then
120-
TEST_ID="-$(date +"%Y-%m-%d-%H%M%S")"
143+
# If not specified, default mandatory 'monitor period' in seconds.
144+
if [ -z "${MONITOR_PERIOD}" ]; then
145+
export MONITOR_PERIOD=30
121146
fi
122147

148+
# Set up directory for filesystem test log. The success of the test
149+
# is extracted from this log.
123150
if [ -z "${TEST_DIR}" ]; then
124151
TEST_DIR="/tmp"
125152
fi
126153
mkdir -p "${TEST_DIR}"
154+
TEST_LOG="${TEST_DIR}/oci-ccm-canary-test.log"
127155

128-
TEST_PREFIX="oci-ccm-canary-test"
129-
TEST_LOG="${TEST_DIR}/${TEST_PREFIX}${TEST_ID}.log"
130-
131-
# If provided, execute the specified function.
132156
if [ ! -z "$1" ]; then
133-
$1
157+
# If provided, execute the specified function with args.
158+
# e.g. run-once, monitor, clean, etc.
159+
$@
134160
else
135-
run
161+
# Otherwise, run the monitor
162+
monitor
136163
fi
137164

138165
exit $?

hack/validate-canary.sh

Lines changed: 51 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ function local-docker-mode() {
8888
docker exec -it ${cid} /bin/bash
8989
}
9090

91-
9291
# Test Functions **************************************************************
9392
#
9493

@@ -107,28 +106,32 @@ spec:
107106
containers:
108107
- name: oci-cloud-controller-manager-canary-test-runner
109108
image: iad.ocir.io/oracle/oci-cloud-controller-manager-canary:${version}
110-
command: ["/bin/bash"]
111-
args: ["-ec", "make canary"]
112109
env:
113-
- name: METRICS_FILE
114-
value: /metrics/output.json
115-
- name: KUBECONFIG_VAR
116-
value: $(cat ${KUBECONFIG} | openssl enc -base64 -A)
110+
- name: KUBECONFIG_VAR
111+
value: $(cat ${KUBECONFIG} | openssl enc -base64 -A)
112+
- name: METRICS_FILE
113+
value: /metrics/output.json
114+
- name: MONITOR_PERIOD
115+
value: "30"
116+
- name: CANARY_MODE
117+
value: monitor
118+
command: ["/bin/bash"]
119+
args: ["-ec", "/oci/scripts/ccm-canary-entrypoint.sh"]
117120
volumeMounts:
118121
- mountPath: /metrics
119122
name: metrics-volume
120-
123+
121124
- name: oci-cloud-controller-manager-canary-test-reporter
122125
image: iad.ocir.io/oracle/oci-cloud-controller-manager-ci-e2e:1.0.1
123126
command: ["/bin/bash"]
124-
args: ["-ec", "touch \$METRICS_FILE; while [ -z \$(cat \$METRICS_FILE | grep 'end_time' | cut -d':' -f 1) ]; do sleep 1; done; cat \$METRICS_FILE"]
127+
args: ["-ec", "while true; do sleep 10; cat \$METRICS_FILE; done"]
125128
env:
126-
- name: METRICS_FILE
127-
value: /metrics/output.json
129+
- name: METRICS_FILE
130+
value: /metrics/output.json
128131
volumeMounts:
129132
- mountPath: /metrics
130133
name: metrics-volume
131-
134+
132135
imagePullSecrets:
133136
- name: ocir
134137
@@ -152,17 +155,37 @@ function run() {
152155
clean-canary
153156
generate-canary-manifest
154157
deploy-canary
155-
# Tail the logs of the reporter to block until it completes. The report only logs the result file.
156-
res=$(kubectl logs -f oci-cloud-controller-manager-canary -c oci-cloud-controller-manager-canary-test-reporter)
157-
# Display the results.
158-
echo "${res}"
159-
# Grep the log to return an error code.
160-
error=$(echo "${res}" | grep 'end_time' | cut -d':' -f 1)
161-
if [ -z ${error} ]; then
162-
exit 1
163-
else
164-
exit 0
165-
fi
158+
159+
local canary_runs=${CANARY_RUNS}
160+
local duration=1800
161+
local sleep=10
162+
local timeout=$(($(date +%s) + $duration))
163+
while [ $(date +%s) -lt $timeout ]; do
164+
echo "waiting for ${canary_runs} runs."
165+
local logs=$(kubectl logs oci-cloud-controller-manager-canary -c oci-cloud-controller-manager-canary-test-reporter)
166+
local num_runs=$(echo "${logs}"| grep 'end_time' | uniq | wc -l)
167+
echo "currently run ${num_runs} times."
168+
if [ "${num_runs}" -ge "${canary_runs}" ]; then
169+
# Remove canary and delete any remaining test namespaces.
170+
kubectl delete pod oci-cloud-controller-manager-canary
171+
local res=$(kubectl get ns | grep 'cm-e2e-tests-' | awk '{print $1}')
172+
if [ ! -z "${res}" ]; then
173+
echo ${res} | xargs kubectl delete ns
174+
fi
175+
# Test results
176+
local num_pass=$(echo "${logs}"| grep '"create_lb": "1"' | uniq | wc -l)
177+
local num_fail=$(echo "${logs}"| grep '"create_lb": "0"' | uniq | wc -l)
178+
if [ "${num_fail}" -gt "0" ]; then
179+
echo "FAILED"
180+
kubectl logs oci-cloud-controller-manager-canary -c oci-cloud-controller-manager-canary-test-runner
181+
exit 1
182+
elif [ "${num_pass}" -eq "1" ]; then
183+
echo "PASSED"
184+
exit 0
185+
fi
186+
fi
187+
sleep ${sleep}
188+
done
166189
}
167190

168191
# Main ************************************************************************
@@ -178,11 +201,16 @@ if [ -z "${KUBECONFIG}" ]; then
178201
export KUBECONFIG=/tmp/kubeconfig
179202
fi
180203
fi
204+
181205
if [ -z "${VERSION}" ]; then
182206
echo "The VERSION must be set"
183207
exit 1
184208
fi
185209

210+
if [ -z "${CANARY_RUNS}" ]; then
211+
export CANARY_RUNS=1
212+
fi
213+
186214
# If provided, execute the specified function.
187215
if [ ! -z "$1" ]; then
188216
$1

test/e2e/framework/framework.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"flag"
2121
"fmt"
2222
"os"
23+
"strconv"
2324
"strings"
2425
"time"
2526

@@ -51,6 +52,7 @@ var (
5152
kubeconfig string // path to kubeconfig file
5253
deleteNamespace bool // whether or not to delete test namespaces
5354
cloudConfigFile string // path to cloud provider config file
55+
nodePortTest bool // whether or not to test the connectivity of node ports.
5456
ccmSeclistID string // The ocid of the loadbalancer subnet seclist. Optional.
5557
k8sSeclistID string // The ocid of the k8s worker subnet seclist. Optional.
5658
)
@@ -59,6 +61,7 @@ func init() {
5961
flag.StringVar(&kubeconfig, "kubeconfig", "", "Path to Kubeconfig file with authorization and master location information.")
6062
flag.BoolVar(&deleteNamespace, "delete-namespace", true, "If true tests will delete namespace after completion. It is only designed to make debugging easier, DO NOT turn it off by default.")
6163
flag.StringVar(&cloudConfigFile, "cloud-config", "", "The path to the cloud provider configuration file. Empty string for no configuration file.")
64+
flag.BoolVar(&nodePortTest, "nodeport-test", false, "If true test will include 'nodePort' connectectivity tests.")
6265
flag.StringVar(&ccmSeclistID, "ccm-seclist-id", "", "The ocid of the loadbalancer subnet seclist. Enables additional seclist rule tests. If specified the 'k8s-seclist-id parameter' is also required.")
6366
flag.StringVar(&k8sSeclistID, "k8s-seclist-id", "", "The ocid of the k8s worker subnet seclist. Enables additional seclist rule tests. If specified the 'ccm-seclist-id parameter' is also required.")
6467
}
@@ -75,6 +78,7 @@ type Framework struct {
7578

7679
CloudProviderConfig *oci.Config // If specified, the CloudProviderConfig. This provides information on the configuration of the test cluster.
7780
Client client.Interface // An OCI client for checking the state of any provisioned OCI infrastructure during testing.
81+
NodePortTest bool // An optional configuration for E2E testing. If set to true, then will run additional E2E nodePort connectivity checks during testing.
7882
CCMSecListID string // An optional configuration for E2E testing. If present can be used to run additional checks against seclist during testing.
7983
K8SSecListID string // An optional configuration for E2E testing. If present can be used to run additional checks against seclist during testing.
8084

@@ -111,6 +115,13 @@ func NewFramework(baseName string, client clientset.Interface) *Framework {
111115
BaseName: baseName,
112116
ClientSet: client,
113117
}
118+
// Dev/CI only configuration. Enable NodePort tests.
119+
npt, err := strconv.ParseBool(os.Getenv("NODEPORT_TEST"))
120+
if err != nil {
121+
f.NodePortTest = false
122+
} else {
123+
f.NodePortTest = npt
124+
}
114125
// Dev/CI only configuration. The seclist for CCM load-balancer routes.
115126
f.CCMSecListID = os.Getenv("CCM_SECLIST_ID")
116127
if ccmSeclistID != "" {
@@ -202,7 +213,8 @@ func (f *Framework) BeforeEach() {
202213
// https://github.com/onsi/ginkgo/issues/222
203214
f.cleanupHandle = AddCleanupAction(f.AfterEach)
204215

205-
if f.Client == nil {
216+
// Create an OCI client if the cloudConfig has been specified.
217+
if cloudConfigFile != "" && f.Client == nil {
206218
By("Creating OCI client")
207219
cloudProviderConfig, err := createCloudProviderConfig(cloudConfigFile)
208220
Expect(err).NotTo(HaveOccurred())

test/e2e/framework/seclist_util.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ func CountSinglePortSecListRules(oci client.Interface, egressSecListID, ingressS
3434

3535
// CountEgressSinglePortRules counts the number of 'single port' (non-ranged)
3636
// egress rules for the specified seclist and port.
37-
// If no seclist is provided, then 0 is returned.
37+
// If no client or seclist is provided, then 0 is returned.
3838
func CountEgressSinglePortRules(oci client.Interface, seclistID string, port int) int {
3939
count := 0
40-
if seclistID != "" {
40+
if oci != nil && seclistID != "" {
4141
secList, err := oci.Networking().GetSecurityList(context.Background(), seclistID)
4242
if err != nil {
4343
Failf("Could not obtain security list: %v", err)
@@ -89,10 +89,10 @@ func WaitForSinglePortEgressRulesAfterPortChangeOrFail(oci client.Interface, sec
8989

9090
// CountIngressSinglePortRules counts the number of 'single port' (non-ranged)
9191
// ingress rules for the specified seclist and port.
92-
// If no seclist is provided, then 0 is returned.
92+
// If no client or seclist is provided, then 0 is returned.
9393
func CountIngressSinglePortRules(oci client.Interface, seclistID string, port int) int {
9494
count := 0
95-
if seclistID != "" {
95+
if oci != nil && seclistID != "" {
9696
secList, err := oci.Networking().GetSecurityList(context.Background(), seclistID)
9797
if err != nil {
9898
Failf("Could not obtain security list: %v", err)

0 commit comments

Comments
 (0)