Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions parts/linux/cloud-init/artifacts/localdns.sh
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,36 @@ add_iptable_rules_to_skip_conntrack_from_pods(){
done
}

# Wait for localdns IP to be removed from resolv.conf after networkctl reload.
# Arguments:
# $1: max_wait_seconds - Maximum time to wait for the change (default: 5).
wait_for_localdns_removed_from_resolv_conf() {
local max_wait_seconds="${1:-5}"
local sleep_interval=0.25
local max_iterations=$((max_wait_seconds * 4)) # 4 iterations per second with 0.25s sleep
local iteration=0

echo "Waiting for localdns (${LOCALDNS_NODE_LISTENER_IP}) to be removed from resolv.conf..."

while [ "$iteration" -lt "$max_iterations" ]; do
local current_dns
current_dns=$(awk '/^nameserver/ {print $2}' "$RESOLV_CONF" 2>/dev/null | paste -sd' ')

# Use word boundary matching (-w) with fixed string (-F) to avoid partial IP matches.
if ! echo "$current_dns" | grep -qwF "$LOCALDNS_NODE_LISTENER_IP"; then
echo "DNS configuration refreshed successfully. Current DNS: ${current_dns}"
return 0
fi

sleep $sleep_interval
iteration=$((iteration + 1))
done

echo "Timed out waiting for localdns to be removed from resolv.conf after ${max_wait_seconds} seconds."
echo "Current DNS: $(awk '/^nameserver/ {print $2}' "$RESOLV_CONF" 2>/dev/null | paste -sd' ')"
return 1
}

# Disable DNS provided by DHCP and point the system at localdns.
disable_dhcp_use_clusterlistener() {
mkdir -p "${NETWORK_DROPIN_DIR}"
Expand Down Expand Up @@ -621,6 +651,19 @@ initialize_network_variables || exit $ERR_LOCALDNS_FAIL
# ---------------------------------------------------------------------------------------------------------------------
cleanup_iptables_and_dns || exit $ERR_LOCALDNS_FAIL

# During startup, wait for the DNS configuration to be fully refreshed.
# This ensures systemd-resolved has removed localdns from resolv.conf before we read upstream DNS servers.
# The wait is necessary because networkctl reload is async - there's a delay before systemd-resolved
# updates /run/systemd/resolve/resolv.conf. The next step (replace_azurednsip_in_corefile) reads
# resolv.conf to get upstream DNS servers. Without this wait, we might still see 169.254.10.10
# (localdns IP) as a nameserver, which would create a circular dependency in the corefile.
# Note: the shutdown path does not need this wait because it doesn't read from resolv.conf afterward -
# it just cleans up and exits, so systemd-resolved can complete the update asynchronously.
if ! wait_for_localdns_removed_from_resolv_conf 5; then
echo "Error: DNS configuration was not refreshed within timeout."
exit $ERR_LOCALDNS_FAIL
fi

# Replace AzureDNSIP in corefile with VNET DNS ServerIPs.
# ---------------------------------------------------------------------------------------------------------------------
replace_azurednsip_in_corefile || exit $ERR_LOCALDNS_FAIL
Expand Down
151 changes: 151 additions & 0 deletions spec/parts/linux/cloud-init/artifacts/localdns_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1110,4 +1110,155 @@ EOF
The status should be success
End
End


# This section tests - wait_for_localdns_removed_from_resolv_conf
# This function is defined in parts/linux/cloud-init/artifacts/localdns.sh file.
#------------------------------------------------------------------------------------------------------------------------------------
Describe 'wait_for_localdns_removed_from_resolv_conf'
setup() {
Include "./parts/linux/cloud-init/artifacts/localdns.sh"
TEST_DIR="/tmp/localdnstest-$$"
RESOLV_CONF="${TEST_DIR}/run/systemd/resolve/resolv.conf"
mkdir -p "$(dirname "$RESOLV_CONF")"
}
cleanup() {
rm -rf "$TEST_DIR"
}
BeforeEach 'setup'
AfterEach 'cleanup'

#------------------------- wait_for_localdns_removed_from_resolv_conf ------------------------------------------
It 'should return success immediately if localdns IP is absent'
cat > "$RESOLV_CONF" <<EOF
nameserver 10.0.0.1
nameserver 10.0.0.2
EOF
When run wait_for_localdns_removed_from_resolv_conf 5
The status should be success
The stdout should include "DNS configuration refreshed successfully"
The stdout should include "Current DNS: 10.0.0.1 10.0.0.2"
End

It 'should timeout if localdns IP is still present'
cat > "$RESOLV_CONF" <<EOF
nameserver 169.254.10.10
nameserver 10.0.0.1
EOF
When run wait_for_localdns_removed_from_resolv_conf 2
The status should be failure
The stdout should include "Timed out waiting for localdns to be removed from resolv.conf after 2 seconds"
The stdout should include "Current DNS:"
End

It 'should return success if resolv.conf is empty'
> "$RESOLV_CONF"
When run wait_for_localdns_removed_from_resolv_conf 5
The status should be success
The stdout should include "DNS configuration refreshed successfully"
End

It 'should use default timeout of 5 seconds when not specified'
cat > "$RESOLV_CONF" <<EOF
nameserver 10.0.0.1
EOF
When run wait_for_localdns_removed_from_resolv_conf
The status should be success
The stdout should include "DNS configuration refreshed successfully"
End

It 'should handle resolv.conf not existing gracefully'
rm -f "$RESOLV_CONF"
When run wait_for_localdns_removed_from_resolv_conf 2
The status should be success
The stdout should include "DNS configuration refreshed successfully"
End

It 'should not match partial IP addresses'
cat > "$RESOLV_CONF" <<EOF
nameserver 169.254.10.100
EOF
# 169.254.10.100 should NOT match 169.254.10.10
When run wait_for_localdns_removed_from_resolv_conf 2
The status should be success
The stdout should include "DNS configuration refreshed successfully"
End

It 'should detect localdns IP among multiple nameservers'
cat > "$RESOLV_CONF" <<EOF
nameserver 10.0.0.1
nameserver 169.254.10.10
nameserver 10.0.0.2
EOF
When run wait_for_localdns_removed_from_resolv_conf 2
The status should be failure
The stdout should include "Timed out waiting for localdns to be removed"
End

It 'should succeed when localdns IP is removed during wait (async removal)'
# Start with localdns IP present
cat > "$RESOLV_CONF" <<EOF
nameserver 169.254.10.10
nameserver 10.0.0.1
EOF
# Create background process that removes localdns IP after 2 seconds
(sleep 2 && echo "nameserver 10.0.0.1" > "$RESOLV_CONF") &
When run wait_for_localdns_removed_from_resolv_conf 5
The status should be success
The stdout should include "DNS configuration refreshed successfully"
End

It 'should ignore commented lines in resolv.conf'
cat > "$RESOLV_CONF" <<EOF
# nameserver 169.254.10.10
nameserver 10.0.0.1
# This is a comment
nameserver 10.0.0.2
EOF
When run wait_for_localdns_removed_from_resolv_conf 2
The status should be success
The stdout should include "DNS configuration refreshed successfully"
End

It 'should timeout when only localdns IP is present'
cat > "$RESOLV_CONF" <<EOF
nameserver 169.254.10.10
EOF
When run wait_for_localdns_removed_from_resolv_conf 2
The status should be failure
The stdout should include "Timed out waiting for localdns to be removed"
End

It 'should handle IPv6 nameservers mixed with IPv4'
cat > "$RESOLV_CONF" <<EOF
nameserver 10.0.0.1
nameserver 2001:4860:4860::8888
nameserver 10.0.0.2
EOF
When run wait_for_localdns_removed_from_resolv_conf 2
The status should be success
The stdout should include "DNS configuration refreshed successfully"
End

It 'should handle resolv.conf with search and options directives'
cat > "$RESOLV_CONF" <<EOF
search example.com local
nameserver 10.0.0.1
nameserver 10.0.0.2
options timeout:2 attempts:3
EOF
When run wait_for_localdns_removed_from_resolv_conf 2
The status should be success
The stdout should include "DNS configuration refreshed successfully"
The stdout should include "Current DNS: 10.0.0.1 10.0.0.2"
End

It 'should handle whitespace variations in resolv.conf'
# Use tabs and extra spaces
printf "nameserver\t10.0.0.1\nnameserver 10.0.0.2\n" > "$RESOLV_CONF"
When run wait_for_localdns_removed_from_resolv_conf 2
The status should be success
The stdout should include "DNS configuration refreshed successfully"
End
End
End
Loading