Skip to content

Commit fc1fce7

Browse files
marinakogrjeberhard
authored andcommitted
Added more checks for OKE cluster connectivity failures
1 parent af194fb commit fc1fce7

File tree

2 files changed

+55
-4
lines changed

2 files changed

+55
-4
lines changed

Jenkinsfile.oke

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ EOF
422422

423423
export OCI_CLI_CONFIG_FILE=${jenkins_home_directory}/.oci/config
424424
export OCI_CLI_PROFILE=${oci_profile}
425-
425+
oci setup repair-file-permissions --file ${jenkins_home_directory}/.oci/config
426426
echo 'Create a OKE cluster ${CLUSTER_NAME}'
427427

428428
cp -rf ${terraform_script_dir_name}/* ${WORKSPACE}/terraform/.

integration-tests/src/test/resources/oke/terraform/okemodule/oke.create.sh

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,40 @@ checkKubernetesCliConnection() {
9393
exit 1
9494
fi
9595
echo "clusterPublicIP: ###$clusterPublicIP###"
96-
unset NO_PROXY
97-
export NO_PROXY=localhost,127.0.0.1,10.244.0.0/16,10.101.0.0/16,10.196.0.0/16,$clusterPublicIP
96+
echo " NO_PROXY=#$NO_PROXY# "
97+
export NO_PROXY=$NO_PROXY,localhost,127.0.0.1,$clusterPublicIP
9898
echo "export NO_PROXY=:$NO_PROXY"
9999

100+
# Maximum number of retries
101+
max_retries=10
102+
103+
# Initial retry count
104+
retry_count=0
105+
106+
# Command to get cluster info
107+
while [[ $retry_count -lt $max_retries ]]; do
108+
echo "Attempt $((retry_count+1)) of $max_retries to connect to Kubernetes cluster..."
109+
110+
# Try to execute kubectl cluster-info
111+
${KUBERNETES_CLI:-kubectl} cluster-info
112+
if [[ $? -eq 0 ]]; then
113+
echo "Connected to Kubernetes cluster successfully!"
114+
break
115+
else
116+
echo "Connection refused or failed, retrying..."
117+
retry_count=$((retry_count + 1))
118+
sleep 5 # Wait 5 seconds before retrying
119+
fi
120+
done
121+
122+
# Check if retries were exhausted
123+
if [[ $retry_count -eq $max_retries ]]; then
124+
echo "Failed to connect to Kubernetes cluster after $max_retries attempts."
125+
cd "${terraformVarDir}"
126+
terraform destroy -auto-approve -var-file="${terraformVarDir}/${clusterTFVarsFile}.tfvars"
127+
createCluster
128+
fi
129+
100130
local myline_output=$(${KUBERNETES_CLI:-kubectl} get nodes -o wide 2>&1)
101131

102132
if echo "$myline_output" | grep -q "Unable to connect to the server: net/http: TLS handshake timeout"; then
@@ -124,6 +154,26 @@ checkKubernetesCliConnection() {
124154
}
125155

126156
checkClusterRunning() {
157+
kubeconfig_file=${terraformVarDir}/${okeclustername}_kubeconfig
158+
export KUBECONFIG=${terraformVarDir}/${okeclustername}_kubeconfig
159+
echo "Kubeconfig file : $KUBECONFIG"
160+
ls -al $KUBECONFIG
161+
162+
if [ -f "$kubeconfig_file" ] && [ -s "$kubeconfig_file" ]; then
163+
echo "Kubeconfig file exists and is not empty."
164+
else
165+
if [ ! -f "$kubeconfig_file" ]; then
166+
echo "Kubeconfig file does not exist."
167+
cd "${terraformVarDir}"
168+
terraform destroy -auto-approve -var-file="${terraformVarDir}/${clusterTFVarsFile}.tfvars"
169+
createCluster
170+
else
171+
echo "Kubeconfig file exists but is empty."
172+
cd "${terraformVarDir}"
173+
terraform destroy -auto-approve -var-file="${terraformVarDir}/${clusterTFVarsFile}.tfvars"
174+
createCluster
175+
fi
176+
fi
127177
checkKubernetesCliConnection
128178

129179
local privateIP=${vcn_cidr_prefix}
@@ -210,7 +260,8 @@ setupTerraform
210260
deleteOlderVersionTerraformOCIProvider
211261

212262
chmod 600 ${ocipk_path}
213-
263+
sudo yum reinstall ca-certificates -y
264+
sudo iptables -A OUTPUT -p tcp --dport 6443 -j ACCEPT
214265
# run terraform init,plan,apply to create OKE cluster based on the provided tfvar file ${clusterTFVarsFile).tfvar
215266
createCluster
216267
#check status of OKE cluster nodes, destroy if can not access them

0 commit comments

Comments
 (0)