Skip to content

Commit bf20f67

Browse files
committed
G2P-4380 DNS issue fix for base
1 parent ed3e64e commit bf20f67

File tree

2 files changed

+94
-49
lines changed

2 files changed

+94
-49
lines changed

automation/lib/env-phase1.sh

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,46 @@ env_phase1_step7_keycloak_secret() {
525525
mark_step_done "$step_id"
526526
}
527527

528+
# ─────────────────────────────────────────────────────────────────────────────
529+
# Step 1.8: CA certificate ConfigMap (local mode only)
530+
# ─────────────────────────────────────────────────────────────────────────────
531+
# In local mode, services inside pods need to trust our self-signed CA
532+
# when talking to https://keycloak.openg2p.test. We create a ConfigMap
533+
# with the CA cert so it can be mounted into pods and added to trust stores.
534+
env_phase1_step8_ca_configmap() {
535+
local domain_mode=$(cfg "domain_mode" "custom")
536+
[[ "$domain_mode" == "local" ]] || return 0
537+
538+
local env_name=$(cfg "environment")
539+
log_step "E1.8" "Creating CA certificate ConfigMap in namespace '${env_name}'"
540+
541+
ensure_kubeconfig || return 1
542+
543+
local ca_cert="/etc/openg2p/ca/ca.crt"
544+
if [[ ! -f "$ca_cert" ]]; then
545+
log_error "CA certificate not found at ${ca_cert}" \
546+
"The infra script should have created the CA" \
547+
"Re-run openg2p-infra.sh phase 1"
548+
return 1
549+
fi
550+
551+
if kubectl -n "$env_name" get configmap openg2p-ca-cert &>/dev/null; then
552+
log_info "ConfigMap 'openg2p-ca-cert' already exists — updating..."
553+
kubectl -n "$env_name" create configmap openg2p-ca-cert \
554+
--from-file=ca.crt="$ca_cert" --dry-run=client -o yaml | \
555+
kubectl apply -f - > /dev/null 2>&1
556+
else
557+
kubectl -n "$env_name" create configmap openg2p-ca-cert \
558+
--from-file=ca.crt="$ca_cert" || {
559+
log_error "Failed to create CA cert ConfigMap" \
560+
"kubectl create configmap failed"
561+
return 1
562+
}
563+
fi
564+
565+
log_success "ConfigMap 'openg2p-ca-cert' created with CA certificate."
566+
}
567+
528568
# ─────────────────────────────────────────────────────────────────────────────
529569
# Run all Phase 1 steps
530570
# ─────────────────────────────────────────────────────────────────────────────
@@ -540,6 +580,7 @@ run_env_phase1() {
540580
env_phase1_step5_rancher_project
541581
env_phase1_step6_istio_gateway
542582
env_phase1_step7_keycloak_secret
583+
env_phase1_step8_ca_configmap
543584

544585
log_success "Phase 1 complete — environment infrastructure for '${env_name}' is ready."
545586
}

automation/lib/phase1.sh

Lines changed: 53 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -728,81 +728,85 @@ EOF
728728

729729
log_success "dnsmasq configured. All *.${local_domain} resolves to ${node_ip}."
730730

731-
# Configure CoreDNS to forward local domain queries to dnsmasq.
732-
# Pods use CoreDNS (kube-system/rke2-coredns-rke2-coredns) for DNS, which
733-
# doesn't know about our local domains. We add a custom server block that
734-
# forwards *.openg2p.test to dnsmasq on the node IP.
735-
# This runs after dnsmasq is up but before K8s is necessarily ready,
736-
# so we save the config and apply it after RKE2 starts (in run_phase1).
737-
log_info "Preparing CoreDNS custom config for ${local_domain} -> dnsmasq..."
731+
# Save local domain and node IP for the CoreDNS patching step (step 7b).
732+
# CoreDNS patching requires kubectl, which is available after RKE2 starts.
738733
mkdir -p /var/lib/openg2p/deploy-state
739-
cat > /var/lib/openg2p/deploy-state/coredns-custom.yaml <<DNSEOF
740-
apiVersion: v1
741-
kind: ConfigMap
742-
metadata:
743-
name: rke2-coredns-rke2-coredns-custom
744-
namespace: kube-system
745-
data:
746-
openg2p.server: |
747-
${local_domain}:53 {
748-
errors
749-
cache 30
750-
forward . ${node_ip}
751-
}
752-
DNSEOF
753-
log_info "CoreDNS custom config saved. Will be applied after RKE2 starts."
734+
echo "${local_domain}" > /var/lib/openg2p/deploy-state/coredns-local-domain
735+
echo "${node_ip}" > /var/lib/openg2p/deploy-state/coredns-node-ip
754736

755737
mark_step_done "$step_id"
756738
}
757739

758740
# ─────────────────────────────────────────────────────────────────────────────
759-
# Step 7b: Apply CoreDNS custom config (local mode only)
741+
# Step 7b: Patch CoreDNS Corefile for local domain forwarding
760742
# ─────────────────────────────────────────────────────────────────────────────
761-
# In local mode, pods inside the cluster need to resolve *.openg2p.test.
762-
# CoreDNS only knows about cluster.local — we add a custom server block
763-
# that forwards local domain queries to dnsmasq on the node.
764-
# RKE2's CoreDNS watches the ConfigMap rke2-coredns-rke2-coredns-custom
765-
# and auto-reloads when it changes.
743+
# In local mode, pods use CoreDNS which only knows about cluster.local.
744+
# We patch the main CoreDNS Corefile to add a server block that forwards
745+
# *.openg2p.test queries to dnsmasq on the node IP.
746+
#
747+
# Note: RKE2's CoreDNS does NOT mount a custom ConfigMap volume by default,
748+
# so the "import custom/*.server" approach doesn't work. We inject the
749+
# server block directly into the main rke2-coredns-rke2-coredns ConfigMap.
766750
phase1_step7b_coredns_custom() {
767751
local domain_mode=$(cfg "domain_mode" "custom")
768752
[[ "$domain_mode" == "local" ]] || return 0
769753

770754
local step_id="phase1.coredns_custom"
771-
skip_if_done "$step_id" "CoreDNS custom config" && return 0
755+
skip_if_done "$step_id" "CoreDNS local domain forwarding" && return 0
772756

773-
local coredns_file="/var/lib/openg2p/deploy-state/coredns-custom.yaml"
774-
if [[ ! -f "$coredns_file" ]]; then
775-
log_info "No CoreDNS custom config found — skipping."
776-
return 0
777-
fi
757+
local local_domain=$(cfg "local_domain" "openg2p.test")
758+
local node_ip=$(cfg "node_ip")
778759

779-
log_info "Applying CoreDNS custom config for local domain forwarding..."
760+
log_info "Patching CoreDNS Corefile to forward ${local_domain} -> dnsmasq (${node_ip})..."
780761
ensure_kubeconfig || return 1
781762

782-
kubectl apply -f "$coredns_file" || {
783-
log_error "Failed to apply CoreDNS custom config" \
784-
"kubectl apply failed" \
785-
"Check CoreDNS ConfigMap" \
786-
"kubectl -n kube-system get configmap rke2-coredns-rke2-coredns-custom -o yaml"
763+
# Check if the Corefile already has the local domain block
764+
local current_corefile
765+
current_corefile=$(kubectl -n kube-system get configmap rke2-coredns-rke2-coredns \
766+
-o jsonpath='{.data.Corefile}' 2>/dev/null || true)
767+
768+
if [[ -z "$current_corefile" ]]; then
769+
log_error "CoreDNS ConfigMap not found" \
770+
"rke2-coredns-rke2-coredns ConfigMap missing in kube-system" \
771+
"Check RKE2 CoreDNS deployment" \
772+
"kubectl -n kube-system get configmap"
787773
return 1
788-
}
774+
fi
789775

790-
# Restart CoreDNS to pick up the change immediately
791-
kubectl -n kube-system rollout restart deployment rke2-coredns-rke2-coredns > /dev/null 2>&1 || true
792-
sleep 5
776+
if echo "$current_corefile" | grep -q "${local_domain}:53"; then
777+
log_info "CoreDNS Corefile already contains ${local_domain} server block — skipping."
778+
else
779+
log_info "Injecting ${local_domain} server block into CoreDNS Corefile..."
780+
kubectl -n kube-system get configmap rke2-coredns-rke2-coredns -o json | \
781+
jq --arg domain "$local_domain" --arg ip "$node_ip" '
782+
.data.Corefile = $domain + ":53 {\n errors\n cache 30\n forward . " + $ip + "\n}\n" + .data.Corefile
783+
' | kubectl apply -f - || {
784+
log_error "Failed to patch CoreDNS Corefile" \
785+
"jq/kubectl pipeline failed" \
786+
"Check CoreDNS ConfigMap" \
787+
"kubectl -n kube-system get configmap rke2-coredns-rke2-coredns -o yaml"
788+
return 1
789+
}
793790

794-
# Verify: resolve the local domain from inside CoreDNS
795-
local local_domain=$(cfg "local_domain" "openg2p.test")
796-
local node_ip=$(cfg "node_ip")
791+
# Restart CoreDNS to pick up the change
792+
kubectl -n kube-system rollout restart deployment rke2-coredns-rke2-coredns > /dev/null 2>&1 || true
793+
log_info "CoreDNS restarting..."
794+
sleep 10
795+
fi
796+
797+
# Verify: resolve the local domain from inside a pod
798+
log_info "Verifying DNS resolution from inside a pod..."
797799
local test_ip
798800
test_ip=$(kubectl run dns-test --rm -i --restart=Never --image=busybox:1.36 \
799-
-- nslookup "keycloak.${local_domain}" 2>/dev/null | grep -A1 "Name:" | tail -1 | awk '{print $2}' || true)
801+
-- nslookup "keycloak.${local_domain}" 2>/dev/null | \
802+
grep -A1 "Name:" | tail -1 | awk '{print $2}' || true)
800803

801804
if [[ "$test_ip" == "$node_ip" ]]; then
802805
log_success "CoreDNS resolves keycloak.${local_domain} -> ${node_ip} from inside pods."
803806
else
804807
log_warn "CoreDNS verification returned '${test_ip}' (expected ${node_ip})."
805-
log_warn "CoreDNS may still be reloading. Pods should resolve after a few seconds."
808+
log_warn "Pods may need a few more seconds. Check manually:"
809+
log_warn " kubectl run dns-test --rm -it --restart=Never --image=busybox:1.36 -- nslookup keycloak.${local_domain}"
806810
fi
807811

808812
mark_step_done "$step_id"

0 commit comments

Comments
 (0)