-
Notifications
You must be signed in to change notification settings - Fork 538
Script to identify network issues. As of now, it looks for DNS issues. It can be extended to capture other network issues as well. #578
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
princepereira
wants to merge
1
commit into
microsoft:master
Choose a base branch
from
princepereira:ppereira-networkdiagnostics
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,255 @@ | ||
| param ( | ||
| [Parameter(Mandatory=$false)] | ||
| [bool]$DnsOnly = $false, | ||
|
|
||
| [Parameter(Mandatory=$false)] | ||
| [bool]$DnsPktCap = $false | ||
| ) | ||
|
|
||
| $Global:All = $true | ||
|
|
||
| if ($DnsOnly -eq $true) { | ||
| $All = $false | ||
| } | ||
|
|
||
|
|
||
| function CountAvailableEphemeralPorts([string]$protocol = "TCP") { | ||
|
|
||
| [uint32]$portRangeSize = 64 | ||
| # First, remove all the text bells and whistle (plain text, table headers, dashes, empty lines, ...) from netsh output | ||
| $tcpRanges = (netsh int ipv4 sh excludedportrange $protocol) -replace "[^0-9,\ ]", '' | ? { $_.trim() -ne "" } | ||
|
|
||
| # Then, remove any extra space characters. Only capture the numbers representing the beginning and end of range | ||
| $tcpRangesArray = $tcpRanges -replace "\s+(\d+)\s+(\d+)\s+", '$1,$2' | ConvertFrom-String -Delimiter "," | ||
| #Convert from PSCustomObject to Object[] type | ||
| $tcpRangesArray = @($tcpRangesArray) | ||
|
|
||
| # Extract the ephemeral ports ranges | ||
| $EphemeralPortRange = (netsh int ipv4 sh dynamicportrange $protocol) -replace "[^0-9]", '' | ? { $_.trim() -ne "" } | ||
| $EphemeralPortStart = [Convert]::ToUInt32($EphemeralPortRange[0]) | ||
| $EphemeralPortEnd = $EphemeralPortStart + [Convert]::ToUInt32($EphemeralPortRange[1]) - 1 | ||
|
|
||
| # Find the external interface | ||
| $externalInterfaceIdx = (Get-NetRoute -DestinationPrefix "0.0.0.0/0")[0].InterfaceIndex | ||
| $hostIP = (Get-NetIPConfiguration -ifIndex $externalInterfaceIdx).IPv4Address.IPAddress | ||
|
|
||
| # Extract the used TCP ports from the external interface | ||
| $usedTcpPorts = (Get-NetTCPConnection -LocalAddress $hostIP -ErrorAction Ignore).LocalPort | ||
| $usedTcpPorts | % { $tcpRangesArray += [pscustomobject]@{P1 = $_; P2 = $_ } } | ||
|
|
||
| # Extract the used TCP ports from the 0.0.0.0 interface | ||
| $usedTcpGlobalPorts = (Get-NetTCPConnection -LocalAddress "0.0.0.0" -ErrorAction Ignore).LocalPort | ||
| $usedTcpGlobalPorts | % { $tcpRangesArray += [pscustomobject]@{P1 = $_; P2 = $_ } } | ||
| # Sort the list and remove duplicates | ||
| $tcpRangesArray = ($tcpRangesArray | Sort-Object { $_.P1 } -Unique) | ||
|
|
||
| $tcpRangesList = New-Object System.Collections.ArrayList($null) | ||
| $tcpRangesList.AddRange($tcpRangesArray) | ||
|
|
||
| # Remove overlapping ranges | ||
| for ($i = $tcpRangesList.P1.Length - 2; $i -gt 0 ; $i--) { | ||
| if ($tcpRangesList[$i].P2 -gt $tcpRangesList[$i + 1].P1 ) { | ||
| $tcpRangesList.Remove($tcpRangesList[$i + 1]) | ||
| $i++ | ||
| } | ||
| } | ||
|
|
||
| # Remove the non-ephemeral port reservations from the list | ||
| $filteredTcpRangeArray = $tcpRangesList | ? { $_.P1 -ge $EphemeralPortStart } | ||
| $filteredTcpRangeArray = $filteredTcpRangeArray | ? { $_.P2 -le $EphemeralPortEnd } | ||
|
|
||
| if ($null -eq $filteredTcpRangeArray) { | ||
| $freeRanges = @($EphemeralPortRange[1]) | ||
| } | ||
| else { | ||
| $freeRanges = @() | ||
| # The first free range goes from $EphemeralPortStart to the beginning of the first reserved range | ||
| $freeRanges += ([Convert]::ToUInt32($filteredTcpRangeArray[0].P1) - $EphemeralPortStart) | ||
|
|
||
| for ($i = 1; $i -lt $filteredTcpRangeArray.length; $i++) { | ||
| # Subsequent free ranges go from the end of the previous reserved range to the beginning of the current reserved range | ||
| $freeRanges += ([Convert]::ToUInt32($filteredTcpRangeArray[$i].P1) - [Convert]::ToUInt32($filteredTcpRangeArray[$i - 1].P2) - 1) | ||
| } | ||
|
|
||
| # The last free range goes from the end of the last reserved range to $EphemeralPortEnd | ||
| $freeRanges += ($EphemeralPortEnd - [Convert]::ToUInt32($filteredTcpRangeArray[$filteredTcpRangeArray.length - 1].P2)) | ||
| } | ||
|
|
||
| # Count the number of available free ranges | ||
| [uint32]$freeRangesCount = 0 | ||
| ($freeRanges | % { $freeRangesCount += [Math]::Floor($_ / $portRangeSize) } ) | ||
|
|
||
| return $freeRangesCount | ||
| } | ||
|
|
||
|
|
||
| function CheckHnsDnsRuleMissing { | ||
| $expectedDnsRuleCount = 2 | ||
| Write-Host "Checking HNS DNS Rule missing" | ||
| $dnsRuleCount = ((Get-HnsPolicyList).Policies | where InternalPort -EQ 53 | where ExternalPort -EQ 53).Count | ||
| if($dnsRuleCount -lt $expectedDnsRuleCount) { | ||
| Write-Host "HNS DNS rule count is $dnsRuleCount. DNS issue for sure." -ForegroundColor Red | ||
| Write-Host "Resolution: Upgrade to 1.24.10+, 1.25.6+, 1.26.1+, 1.27.0+" -ForegroundColor Red | ||
| Write-Host "Mitigation : Restart-Service -f kubeproxy" -ForegroundColor Red | ||
| return $true | ||
| } | ||
| Write-Host "HNS DNS rule count is $dnsRuleCount. No DNS issue due to missing HNS DNS rules." -ForegroundColor Green | ||
| return $false | ||
| } | ||
|
|
||
| function CheckHnsDeadlock { | ||
| Write-Host "Checking HNS Deadlock." | ||
| $hnsThreadThrshold = 100 | ||
| $hnsProcessId = Get-WmiObject -Class Win32_Service -Filter "Name LIKE 'Hns'" | Select-Object -ExpandProperty ProcessId | ||
| $hnsThreads = (Get-Process -Id $hnsProcessId).Threads | ||
| $threadCount = $hnsThreads.Count | ||
| if($threadCount -ge $hnsThreadThrshold) { | ||
| Write-Host "HNS thread count is $threadCount which is greater than expected $hnsThreadThrshold. There are chances of deadlock." -ForegroundColor Red | ||
| Write-Host "Resolution: Upgrade to Windows 2022" -ForegroundColor Red | ||
| Write-Host "Mitigation : Restart-Service -f hns , Start-Sleep -Seconds 10 ; Restart-Service -f KubeProxy " -ForegroundColor Red | ||
| return $true | ||
| } | ||
| Write-Host "HNS thread count is $threadCount . No chances of deadlock." -ForegroundColor Green | ||
| return $false | ||
| } | ||
|
|
||
| function CheckHnsCrash { | ||
| Write-Host "Checking HNS crash" | ||
| $hnsCrashCount = (Get-WinEvent -FilterHashtable @{logname = 'System'; ProviderName = 'Service Control Manager' } | Select-Object -Property TimeCreated, Id, LevelDisplayName, Message | Where-Object Message -like "*The Host Network Service terminated unexpectedly*").Count | ||
| if($hnsCrashCount -gt 0) { | ||
| Write-Host "HNS crash count is $hnsCrashCount. There are chances of issues." -ForegroundColor Red | ||
| Write-Host "Resolution: Upgrade to 1.24.10+, 1.25.6+, 1.26.1+, 1.27.0+" -ForegroundColor Red | ||
| Write-Host "Mitigation : Restart-Service -f KubeProxy " -ForegroundColor Red | ||
| return $true | ||
| } | ||
| Write-Host "HNS crash count is $hnsCrashCount. No issue reported with HNS crash." -ForegroundColor Green | ||
| return $false | ||
| } | ||
|
|
||
| function CheckPortExhaustion { | ||
| Write-Host "Checking Port Exhaustion" | ||
| $avTcpPorts = CountAvailableEphemeralPorts -protocol TCP | ||
| if($avTcpPorts -lt 10) { | ||
| Write-Host "Available TCP ports are $avTcpPorts. Port exhaustion suspected." -ForegroundColor Red | ||
| return $true | ||
| } | ||
| $avUdpPorts = CountAvailableEphemeralPorts -protocol UDP | ||
| if($avTcpPorts -lt 10) { | ||
| Write-Host "Available UDP ports are $avUdpPorts. Port exhaustion suspected." -ForegroundColor Red | ||
| return $true | ||
| } | ||
| Write-Host "Available TCP Ports : $avTcpPorts , UDP Ports : $avUdpPorts . No port exhaustion suspected." -ForegroundColor Green | ||
| return $false | ||
| } | ||
|
|
||
| function CheckKubeProxyCrash { | ||
| Write-Host "Checking KubeProxy restart" | ||
| for($i = 1; $i -le 10; $i++) { | ||
| $status = (Get-Service kubeproxy).Status | ||
| if($status -eq "Stopped") { | ||
| Write-Host "KubeProxy is restarting. There are chances of issues." -ForegroundColor Red | ||
| Write-Host "Resolution: Upgrade to v1.24.12+, v1.25.8, v1.26.3+, v1.27.0+" -ForegroundColor Red | ||
| Write-Host "Mitigation : Restart the node or drain to a new node " -ForegroundColor Red | ||
| return $true | ||
| } | ||
| $waitTime = (10 - $i) | ||
| Write-Host "Checking KubeProxy restart. Wait time : $waitTime seconds" | ||
| Start-Sleep -Seconds 1 | ||
| } | ||
| Write-Host "KubeProxy service state is $status . No issues identified with KubeProxy restart." -ForegroundColor Green | ||
| return $false | ||
| } | ||
|
|
||
| function CheckVfpDnsRuleMissing { | ||
| Write-Host "Checking VFP DNS Rule missing" | ||
| $vfpDnsRuleMissing = $false | ||
| $endpoints = Get-HnsEndpoint | ||
| foreach($ep in $endpoints) { | ||
| if($ep.IsRemoteEndpoint -eq $true) { | ||
| # Write-Host "REP found : $ep" | ||
| continue | ||
| } | ||
| $epID = $ep.ID | ||
| $epMac = $ep.MacAddress | ||
| $epIpAddress = $ep.IPAddress | ||
| $portID = $ep.Resources.Allocators[0].EndpointPortGuid | ||
| $tcpRule = vfpctrl.exe /port $portID /layer LB_DSR /group LB_DSR_IPv4_OUT /list-rule | Select-String -Pattern "RULE.*53_53_6" | ||
| if($tcpRule.Count -lt 1) { | ||
| $vfpDnsRuleMissing = $true | ||
| Write-Host "VFP DNS TCP Rule missing for VFP Port : $portID . Endpoint ID : $epID , Mac : $epMac , IP Address : $epIpAddress" -ForegroundColor Red | ||
| } | ||
| $udpRule = vfpctrl.exe /port $portID /layer LB_DSR /group LB_DSR_IPv4_OUT /list-rule | Select-String -Pattern "RULE.*53_53_17" | ||
| if($udpRule.Count -lt 1) { | ||
| $vfpDnsRuleMissing = $true | ||
| Write-Host "VFP DNS UDP Rule missing for VFP Port : $portID . Endpoint ID : $epID , Mac : $epMac , IP Address : $epIpAddress" -ForegroundColor Red | ||
| } | ||
| } | ||
|
|
||
| if($vfpDnsRuleMissing){ | ||
| Write-Host "Mitigation : Restart-Service -f hns " -ForegroundColor Red | ||
| return $true | ||
| } | ||
|
|
||
| Write-Host "No issues identified with VFP DNS Rule Missing for local endpoints." -ForegroundColor Green | ||
| return $false | ||
| } | ||
|
|
||
| function DnsPktCapture { | ||
| $pktmonLogs = "C:\k\pktmonLogs" | ||
| $captureTime = 15 | ||
| pktmon stop | ||
| Write-Host "Starting DNS Packet Capture" | ||
| Write-Host "Removing all pktmon filters if anything existing..." | ||
| pktmon filter remove | ||
| Write-Host "Create DNS Port filter..." | ||
| pktmon filter add DNSFilter -p 53 | ||
| Write-Host "Create a directory for pktmon logs..." | ||
| remove-item -Recurse -Force $pktmonLogs -ErrorAction Ignore | ||
| mkdir $pktmonLogs | ||
| Set-Location $pktmonLogs | ||
| Write-Host "Start pktmon. Command : [pktmon start -c --comp all --pkt-size 0 -m multi-file] ..." | ||
| pktmon start -c --comp all --pkt-size 0 -m multi-file | ||
| Write-Host "Waiting for $captureTime seconds." | ||
| Start-Sleep -Seconds $captureTime | ||
| pktmon stop | ||
| Write-Host "Logs will be available in $pktmonLogs" | ||
| Write-Host "DNS Packet Capture Completed" | ||
| } | ||
|
|
||
| function ValidateDns { | ||
| Write-Host "Checking DNS Issue." | ||
| if(CheckHnsDnsRuleMissing) { | ||
| Write-Host "DNS Issue Found." -ForegroundColor Red | ||
| return $true | ||
| } | ||
| if(CheckHnsDeadlock) { | ||
| Write-Host "DNS Issue Found." -ForegroundColor Red | ||
| return $true | ||
| } | ||
| if(CheckHnsCrash) { | ||
| Write-Host "DNS Issue Found." -ForegroundColor Red | ||
| return $true | ||
| } | ||
| if(CheckPortExhaustion) { | ||
| Write-Host "DNS Issue Found." -ForegroundColor Red | ||
| return $true | ||
| } | ||
| if(CheckKubeProxyCrash) { | ||
| Write-Host "DNS Issue Found." -ForegroundColor Red | ||
| return $true | ||
| } | ||
| if(CheckVfpDnsRuleMissing) { | ||
| Write-Host "DNS Issue Found." -ForegroundColor Red | ||
| return $true | ||
| } | ||
| Write-Host "No DNS Issues identified as per current test." -ForegroundColor Green | ||
| } | ||
|
|
||
|
|
||
|
|
||
| if ($All -or $DnsOnly) { | ||
| $dnsIssueFound = ValidateDns | ||
| if(!$dnsIssueFound -and $DnsPktCap) { | ||
| DnsPktCapture | ||
| } | ||
| } | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.