Skip to content

Commit eb019cb

Browse files
fix: fix intermittent sync issues impacting confirm_network_healthy script execution
1 parent 921190e commit eb019cb

File tree

5 files changed

+30
-5
lines changed

5 files changed

+30
-5
lines changed

examples/fscloud/main.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ module "ocp_fscloud" {
238238
cluster_name = var.prefix
239239
ibmcloud_api_key = var.ibmcloud_api_key
240240
resource_group_id = module.resource_group.resource_group_id
241-
region = "us-south"
241+
region = var.region
242242
force_delete_storage = true
243243
vpc_id = module.vpc.vpc_id
244244
vpc_subnets = local.cluster_vpc_subnets

examples/fscloud/variables.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,12 @@ variable "hpcs_instance_guid" {
4444
}
4545

4646
variable "hpcs_key_crn_cluster" {
47-
description = "CRN of the Hyper Protect Crypto service to use to encrypt the cluster boot volume"
47+
description = "CRN of the Hyper Protect Crypto service key to use to encrypt the cluster boot volume"
4848
type = string
4949
}
5050

5151
variable "hpcs_key_crn_worker_pool" {
52-
description = "CRN of the Hyper Protect Crypto service to use to encrypt the worker pool boot volumes"
52+
description = "CRN of the Hyper Protect Crypto service key to use to encrypt the worker pool boot volumes"
5353
type = string
5454
}
5555

main.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ data "ibm_container_cluster_config" "cluster_config" {
256256
count = var.verify_worker_network_readiness || lookup(local.addons_list, "cluster-autoscaler", null) != null ? 1 : 0
257257
cluster_name_id = local.cluster_id
258258
config_dir = "${path.module}/kubeconfig"
259+
admin = true # workaround for https://github.com/terraform-ibm-modules/terraform-ibm-base-ocp-vpc/issues/374
259260
resource_group_id = var.resource_group_id
260261
endpoint_type = var.cluster_config_endpoint_type != "default" ? var.cluster_config_endpoint_type : null # null value represents default
261262
}

scripts/confirm_network_healthy.sh

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,31 @@ function run_checks() {
77
last_attempt=$1
88
namespace=calico-system
99

10-
# Get list of calico-node pods (There will be 1 pod per worker node)
10+
MAX_ATTEMPTS=10
11+
attempt=0
1112
PODS=()
12-
while IFS='' read -r line; do PODS+=("$line"); done < <(kubectl get pods -n "${namespace}" | grep calico-node | cut -f1 -d ' ')
13+
while [ $attempt -lt $MAX_ATTEMPTS ]; do
14+
# Get list of calico-node pods (There will be 1 pod per worker node)
15+
if while IFS='' read -r line; do PODS+=("$line"); done < <(kubectl get pods -n "${namespace}" | grep calico-node | cut -f1 -d ' '); then
16+
if [ ${#PODS[@]} -eq 0 ]; then
17+
echo "No calico-node pods found. Retrying in 10s. (Attempt $((attempt+1)) / $MAX_ATTEMPTS)"
18+
sleep 10
19+
((attempt=attempt+1))
20+
else
21+
# Pods found, break out of loop
22+
break
23+
fi
24+
else
25+
echo "Error getting calico-node pods. Retrying in 10s. (Attempt $((attempt+1)) / $MAX_ATTEMPTS)"
26+
sleep 10
27+
((attempt=attempt+1))
28+
fi
29+
done
30+
31+
if [ ${#PODS[@]} -eq 0 ]; then
32+
echo "No calico-node pods found after $MAX_ATTEMPTS attempts. Exiting."
33+
exit 1
34+
fi
1335

1436
# Iterate through pods to check health
1537
healthy=true

scripts/get_config_map_status.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ done
1515

1616
if [[ $COUNTER -eq $MAX_ATTEMPTS ]]; then
1717
echo "ConfigMap '$CONFIGMAP_NAME' did not become available within $MAX_ATTEMPTS attempts."
18+
# Output for debugging
19+
kubectl get configmaps -n $NAMESPACE
1820
exit 1
1921
else
2022
echo "ConfigMap '$CONFIGMAP_NAME' is now available." >&2

0 commit comments

Comments
 (0)