Skip to content

Commit fc4164a

Browse files
mugeshspCopilotCopilotawesomenix
authored
feat: add udev rule to set default RX Buffer as 2048 on systems with 4 or more cores (#7665)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com> Co-authored-by: awesomenix <1703110+awesomenix@users.noreply.github.com> Signed-off-by: Devin Wong <wongsiosun@outlook.com>
1 parent 8fa7647 commit fc4164a

File tree

84 files changed

+1333
-127
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+1333
-127
lines changed

e2e/validation.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ func ValidateCommonLinux(ctx context.Context, s *Scenario) {
4343
ValidateDiskQueueService(ctx, s)
4444
ValidateLeakedSecrets(ctx, s)
4545
ValidateIPTablesCompatibleWithCiliumEBPF(ctx, s)
46+
ValidateRxBufferDefault(ctx, s)
4647

4748
ValidateSysctlConfig(ctx, s, map[string]string{
4849
"net.ipv4.tcp_retries2": "8",

e2e/validators.go

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"net"
1212
"os"
1313
"regexp"
14+
"strconv"
1415
"strings"
1516
"testing"
1617
"time"
@@ -254,6 +255,90 @@ func ValidateSysctlConfig(ctx context.Context, s *Scenario, customSysctls map[st
254255
}
255256
}
256257

258+
// ValidateNetworkInterfaceConfig validates network interface configuration settings using ethtool.
259+
// It identifies network interfaces with slot names matching the enP* pattern (same logic as the udev rule),
260+
// then verifies that each interface has the expected configuration settings (e.g., rx buffer size).
261+
// The nicConfig map specifies the ethtool settings to validate (key: setting name, value: expected value).
262+
func ValidateNetworkInterfaceConfig(ctx context.Context, s *Scenario, nicConfig map[string]string) {
263+
s.T.Helper()
264+
265+
// Get list of NICs using udevadm (same logic as udev rule)
266+
getNicsCommand := []string{
267+
"#!/usr/bin/env bash",
268+
"set -euo pipefail",
269+
"echo '=== NICs to Configure ==='",
270+
"enp_ifaces=()",
271+
"for dev in /sys/class/net/*; do",
272+
" iface=\"$(basename \"$dev\")\"",
273+
" slot=\"$(udevadm info -q property -p \"$dev\" 2>/dev/null | awk -F= '$1==\"ID_NET_NAME_SLOT\"{print $2; exit}')\"",
274+
" [[ \"$slot\" == enP* ]] && enp_ifaces+=(\"$iface\")",
275+
"done",
276+
"IFS=,; echo \"${enp_ifaces[*]}\"",
277+
}
278+
nicsResult := execScriptOnVMForScenarioValidateExitCode(ctx, s, strings.Join(getNicsCommand, "\n"), 0, "could not get nics to configure")
279+
s.T.Logf("NICs to configure:\n%s", nicsResult.stdout)
280+
281+
// Parse NIC output - it may be multi-line with header
282+
lines := strings.Split(strings.TrimSpace(nicsResult.stdout), "\n")
283+
nicsOutput := ""
284+
for _, line := range lines {
285+
line = strings.TrimSpace(line)
286+
// Skip header lines
287+
if strings.Contains(line, "===") || line == "" {
288+
continue
289+
}
290+
nicsOutput = line
291+
break
292+
}
293+
294+
nics := strings.Split(nicsOutput, ",")
295+
296+
s.T.Logf("Parsed NICs list: %v (count: %d)", nics, len(nics))
297+
298+
if len(nics) == 0 || (len(nics) == 1 && strings.TrimSpace(nics[0]) == "") {
299+
s.T.Fatalf("no nics found to validate network interface config")
300+
return
301+
}
302+
303+
for _, nic := range nics {
304+
// Skip empty entries
305+
nic = strings.TrimSpace(nic)
306+
if nic == "" {
307+
continue
308+
}
309+
310+
s.T.Logf("Validating network interface config for NIC: %s", nic)
311+
312+
for setting, expectedValue := range nicConfig {
313+
// Get full ethtool output for debugging
314+
debugCommand := []string{
315+
"set -ex",
316+
fmt.Sprintf("echo '=== Full ethtool output for %s ==='", nic),
317+
fmt.Sprintf("sudo ethtool -g %s", nic),
318+
}
319+
debugResult := execScriptOnVMForScenario(ctx, s, strings.Join(debugCommand, "\n"))
320+
s.T.Logf("Full ethtool output for %s:\n%s", nic, debugResult.stdout)
321+
322+
command := []string{
323+
"set -ex",
324+
fmt.Sprintf("sudo ethtool -g %s | grep -A 5 'Current hardware settings' | grep -i %s: | awk '{print $2}'", nic, setting),
325+
}
326+
execResult := execScriptOnVMForScenarioValidateExitCode(ctx, s, strings.Join(command, "\n"), 0, "could not get ethtool config")
327+
actualValue := strings.TrimSpace(execResult.stdout)
328+
s.T.Logf("Ethtool setting %s for NIC %s: expected=%s, actual=%s", setting, nic, expectedValue, actualValue)
329+
require.Equal(s.T, expectedValue, actualValue, "expected %s to be %s on nic %s, but got %s.\nFull ethtool output:\n%s", setting, expectedValue, nic, actualValue, debugResult.stdout)
330+
}
331+
}
332+
}
333+
334+
// ValidateAzureNetworkFiles checks that udev rules files exist.
335+
func ValidateAzureNetworkFiles(ctx context.Context, s *Scenario) {
336+
s.T.Helper()
337+
338+
ValidateFileExists(ctx, s, "/opt/azure-network/configure-azure-network.sh")
339+
ValidateFileExists(ctx, s, "/etc/udev/rules.d/99-azure-network.rules")
340+
}
341+
257342
func ValidateNvidiaSMINotInstalled(ctx context.Context, s *Scenario) {
258343
s.T.Helper()
259344
command := []string{
@@ -1623,3 +1708,35 @@ func ValidateNodeHasLabel(ctx context.Context, s *Scenario, labelKey, expectedVa
16231708
require.True(s.T, exists, "expected node %q to have label %q, but it was not found", s.Runtime.VM.KubeName, labelKey)
16241709
require.Equal(s.T, expectedValue, actualValue, "expected node %q label %q to have value %q, but got %q", s.Runtime.VM.KubeName, labelKey, expectedValue, actualValue)
16251710
}
1711+
1712+
// ValidateRxBufferDefault validates rx buffer config using default values based on VM's CPU count
1713+
func ValidateRxBufferDefault(ctx context.Context, s *Scenario) {
1714+
s.T.Helper()
1715+
1716+
// Query the VM's actual CPU count using nproc
1717+
cpuCountCmd := "nproc"
1718+
result := execScriptOnVMForScenarioValidateExitCode(ctx, s, cpuCountCmd, 0, "could not get CPU count from VM")
1719+
vmCPUCount := strings.TrimSpace(result.stdout)
1720+
1721+
// Parse CPU count
1722+
cpuCount, err := strconv.Atoi(vmCPUCount)
1723+
require.NoError(s.T, err, "failed to parse CPU count: %s", vmCPUCount)
1724+
1725+
// Determine expected rx based on VM's CPU count (matching configure-azure-network.sh logic)
1726+
expectedRx := "1024"
1727+
if cpuCount >= 4 {
1728+
expectedRx = "2048"
1729+
}
1730+
1731+
s.T.Logf("VM has %d CPUs, expecting rx buffer size: %s", cpuCount, expectedRx)
1732+
1733+
customNicConfig := map[string]string{
1734+
"rx": expectedRx,
1735+
}
1736+
1737+
// Validate files exist
1738+
ValidateAzureNetworkFiles(ctx, s)
1739+
1740+
// Validate network interface settings match expected default
1741+
ValidateNetworkInterfaceConfig(ctx, s, customNicConfig)
1742+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# AKS Network Tuning udev rule
2+
# This rule configures network interface RX ring buffer settings
3+
# Applies to network interfaces with slot names (ID_NET_NAME_SLOT) matching enP* pattern
4+
SUBSYSTEM=="net", ACTION=="add", ENV{ID_NET_NAME_SLOT}=="enP*", RUN+="/opt/azure-network/configure-azure-network.sh %k"
5+
SUBSYSTEM=="net", ACTION=="change", ENV{ID_NET_NAME_SLOT}=="enP*", RUN+="/opt/azure-network/configure-azure-network.sh %k"
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/bin/bash
2+
3+
# This script configures network interface settings for Azure NICs.
4+
# Called by udev with interface name as argument
5+
6+
INTERFACE="$1"
7+
8+
# Exit if no interface provided
9+
if [ -z "$INTERFACE" ]; then
10+
echo "No interface provided, exiting"
11+
exit 0
12+
fi
13+
14+
# Check if interface exists
15+
if [ ! -d "/sys/class/net/$INTERFACE" ]; then
16+
echo "NIC $INTERFACE does not exist. Skipping."
17+
exit 0
18+
fi
19+
20+
# Determine default RX buffer size based on number of CPUs
21+
NUM_CPUS=$(nproc)
22+
if [ "$NUM_CPUS" -ge 4 ]; then
23+
DEFAULT_RX_BUFFER_SIZE=2048
24+
else
25+
DEFAULT_RX_BUFFER_SIZE=1024
26+
fi
27+
28+
# Get current RX buffer size
29+
CURRENT_RX=$(ethtool -g "$INTERFACE" 2>/dev/null | grep -A4 "Current hardware settings" | grep "^RX:" | awk '{print $2}')
30+
31+
# Only proceed if current RX is 1024
32+
if [ "$CURRENT_RX" != "1024" ]; then
33+
echo "Current RX buffer size is $CURRENT_RX (not 1024), skipping configuration for $INTERFACE"
34+
exit 0
35+
fi
36+
37+
# Use default unless overridden by config file
38+
RX_SIZE=$DEFAULT_RX_BUFFER_SIZE
39+
40+
echo "Detected $NUM_CPUS CPUs, current RX is 1024, configuring $INTERFACE with rx=$RX_SIZE"
41+
ethtool -G "$INTERFACE" rx "$RX_SIZE" || echo "Failed to set ring parameters for $INTERFACE"

parts/linux/cloud-init/artifacts/cse_config.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,18 @@ ensureSysctl() {
817817
retrycmd_if_failure 24 5 25 sysctl --system
818818
}
819819

820+
ensureAzureNetworkConfig() {
821+
# Reload udev rules to pick up the new azure-network rules
822+
udevadm control --reload-rules
823+
824+
# Trigger udev to detect and populate network interfaces
825+
echo "Triggering udev for network devices..."
826+
udevadm trigger --subsystem-match=net --action=add
827+
828+
# Give udev time to process and trigger the systemd service
829+
udevadm settle --timeout=10
830+
}
831+
820832
ensureK8sControlPlane() {
821833
if $REBOOTREQUIRED || [ "$NO_OUTBOUND" = "true" ]; then
822834
return

parts/linux/cloud-init/artifacts/cse_main.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,9 @@ function nodePrep {
340340
touch /opt/azure/outbound-check-skipped
341341
fi
342342

343+
# Configure Azure network settings (udev rules for NIC configuration)
344+
logs_to_events "AKS.CSE.ensureAzureNetworkConfig" ensureAzureNetworkConfig
345+
343346
# Determine if GPU driver installation should be skipped
344347
export -f should_skip_nvidia_drivers
345348
skip_nvidia_driver_install=$(should_skip_nvidia_drivers)

parts/linux/cloud-init/nodecustomdata.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,20 @@ write_files:
321321
content: !!binary |
322322
{{GetVariableProperty "cloudInitData" "validateKubeletCredentialsScript"}}
323323

324+
- path: /opt/azure-network/configure-azure-network.sh
325+
permissions: "0755"
326+
encoding: gzip
327+
owner: root
328+
content: !!binary |
329+
{{GetVariableProperty "cloudInitData" "configureAzureNetworkScript"}}
330+
331+
- path: /etc/udev/rules.d/99-azure-network.rules
332+
permissions: "0644"
333+
encoding: gzip
334+
owner: root
335+
content: !!binary |
336+
{{GetVariableProperty "cloudInitData" "azureNetworkUdevRule"}}
337+
324338
- path: /etc/kubernetes/certs/ca.crt
325339
permissions: "0600"
326340
encoding: base64

pkg/agent/const.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ const (
9696
ensureNoDupEbtablesScript = "linux/cloud-init/artifacts/ensure-no-dup.sh"
9797
ensureNoDupEbtablesService = "linux/cloud-init/artifacts/ensure-no-dup.service"
9898

99+
// Azure network configuration files.
100+
configureAzureNetworkScript = "linux/cloud-init/artifacts/configure-azure-network.sh"
101+
azureNetworkUdevRule = "linux/cloud-init/artifacts/99-azure-network.rules"
102+
99103
componentManifestFile = "linux/cloud-init/artifacts/manifest.json"
100104
)
101105

0 commit comments

Comments
 (0)