Skip to content

Commit f513f97

Browse files
Working on better ping
1 parent b881808 commit f513f97

File tree

2 files changed

+126
-15
lines changed

2 files changed

+126
-15
lines changed

main.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,23 +154,27 @@ func main() {
154154
flag.StringVar(&pingIntervalStr, "ping-interval", "3s", "Interval for pinging the server (default 3s)")
155155
}
156156
if pingTimeoutStr == "" {
157-
flag.StringVar(&pingTimeoutStr, "ping-timeout", "3s", " Timeout for each ping (default 3s)")
157+
flag.StringVar(&pingTimeoutStr, "ping-timeout", "5s", " Timeout for each ping (default 3s)")
158158
}
159159

160160
if pingIntervalStr != "" {
161161
pingInterval, err = time.ParseDuration(pingIntervalStr)
162162
if err != nil {
163-
fmt.Printf("Invalid PING_INTERVAL value: %s, using default 1 second\n", pingIntervalStr)
163+
fmt.Printf("Invalid PING_INTERVAL value: %s, using default 3 seconds\n", pingIntervalStr)
164164
pingInterval = 3 * time.Second
165165
}
166+
} else {
167+
pingInterval = 3 * time.Second
166168
}
167169

168170
if pingTimeoutStr != "" {
169171
pingTimeout, err = time.ParseDuration(pingTimeoutStr)
170172
if err != nil {
171-
fmt.Printf("Invalid PING_TIMEOUT value: %s, using default 2 seconds\n", pingTimeoutStr)
172-
pingTimeout = 3 * time.Second
173+
fmt.Printf("Invalid PING_TIMEOUT value: %s, using default 5 seconds\n", pingTimeoutStr)
174+
pingTimeout = 5 * time.Second
173175
}
176+
} else {
177+
pingTimeout = 5 * time.Second
174178
}
175179

176180
if dockerEnforceNetworkValidation == "" {
@@ -386,6 +390,15 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
386390
close(pingWithRetryStopChan)
387391
pingWithRetryStopChan = nil
388392
}
393+
// Use reliable ping for initial connection test
394+
logger.Debug("Testing initial connection with reliable ping...")
395+
_, err = reliablePing(tnet, wgData.ServerIP, pingTimeout, 5)
396+
if err != nil {
397+
logger.Warn("Initial reliable ping failed, but continuing: %v", err)
398+
} else {
399+
logger.Info("Initial connection test successful!")
400+
}
401+
389402
pingWithRetryStopChan, _ = pingWithRetry(tnet, wgData.ServerIP, pingTimeout)
390403

391404
// Always mark as connected and start the proxy manager regardless of initial ping result

util.go

Lines changed: 109 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,17 @@ func ping(tnet *netstack.Net, dst string, timeout time.Duration) (time.Duration,
4545
}
4646
defer socket.Close()
4747

48+
// Set socket buffer sizes to handle high bandwidth scenarios
49+
if tcpConn, ok := socket.(interface{ SetReadBuffer(int) error }); ok {
50+
tcpConn.SetReadBuffer(64 * 1024)
51+
}
52+
if tcpConn, ok := socket.(interface{ SetWriteBuffer(int) error }); ok {
53+
tcpConn.SetWriteBuffer(64 * 1024)
54+
}
55+
4856
requestPing := icmp.Echo{
4957
Seq: rand.Intn(1 << 16),
50-
Data: []byte("f"),
58+
Data: []byte("newtping"),
5159
}
5260

5361
icmpBytes, err := (&icmp.Message{Type: ipv4.ICMPTypeEcho, Code: 0, Body: &requestPing}).Marshal(nil)
@@ -65,12 +73,14 @@ func ping(tnet *netstack.Net, dst string, timeout time.Duration) (time.Duration,
6573
return 0, fmt.Errorf("failed to write ICMP packet: %w", err)
6674
}
6775

68-
n, err := socket.Read(icmpBytes[:])
76+
// Use larger buffer for reading to handle potential network congestion
77+
readBuffer := make([]byte, 1500)
78+
n, err := socket.Read(readBuffer)
6979
if err != nil {
7080
return 0, fmt.Errorf("failed to read ICMP packet: %w", err)
7181
}
7282

73-
replyPacket, err := icmp.ParseMessage(1, icmpBytes[:n])
83+
replyPacket, err := icmp.ParseMessage(1, readBuffer[:n])
7484
if err != nil {
7585
return 0, fmt.Errorf("failed to parse ICMP packet: %w", err)
7686
}
@@ -92,6 +102,51 @@ func ping(tnet *netstack.Net, dst string, timeout time.Duration) (time.Duration,
92102
return latency, nil
93103
}
94104

105+
// reliablePing performs multiple ping attempts with adaptive timeout
106+
func reliablePing(tnet *netstack.Net, dst string, baseTimeout time.Duration, maxAttempts int) (time.Duration, error) {
107+
var lastErr error
108+
var totalLatency time.Duration
109+
successCount := 0
110+
111+
for attempt := 1; attempt <= maxAttempts; attempt++ {
112+
// Adaptive timeout: increase timeout for later attempts
113+
timeout := baseTimeout + time.Duration(attempt-1)*500*time.Millisecond
114+
115+
// Add jitter to prevent thundering herd
116+
jitter := time.Duration(rand.Intn(100)) * time.Millisecond
117+
timeout += jitter
118+
119+
latency, err := ping(tnet, dst, timeout)
120+
if err != nil {
121+
lastErr = err
122+
logger.Debug("Ping attempt %d/%d failed: %v", attempt, maxAttempts, err)
123+
124+
// Brief pause between attempts with exponential backoff
125+
if attempt < maxAttempts {
126+
backoff := time.Duration(attempt) * 50 * time.Millisecond
127+
time.Sleep(backoff)
128+
}
129+
continue
130+
}
131+
132+
totalLatency += latency
133+
successCount++
134+
135+
// If we get at least one success, we can return early for health checks
136+
if successCount > 0 {
137+
avgLatency := totalLatency / time.Duration(successCount)
138+
logger.Debug("Reliable ping succeeded after %d attempts, avg latency: %v", attempt, avgLatency)
139+
return avgLatency, nil
140+
}
141+
}
142+
143+
if successCount == 0 {
144+
return 0, fmt.Errorf("all %d ping attempts failed, last error: %v", maxAttempts, lastErr)
145+
}
146+
147+
return totalLatency / time.Duration(successCount), nil
148+
}
149+
95150
func pingWithRetry(tnet *netstack.Net, dst string, timeout time.Duration) (stopChan chan struct{}, err error) {
96151

97152
if healthFile != "" {
@@ -180,6 +235,9 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
180235
consecutiveFailures := 0
181236
connectionLost := false
182237

238+
// Track recent latencies for adaptive timeout calculation
239+
recentLatencies := make([]time.Duration, 0, 10)
240+
183241
pingStopChan := make(chan struct{})
184242

185243
go func() {
@@ -188,18 +246,52 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
188246
for {
189247
select {
190248
case <-ticker.C:
191-
_, err := ping(tnet, serverIP, pingTimeout)
249+
// Calculate adaptive timeout based on recent latencies
250+
adaptiveTimeout := pingTimeout
251+
if len(recentLatencies) > 0 {
252+
var sum time.Duration
253+
for _, lat := range recentLatencies {
254+
sum += lat
255+
}
256+
avgLatency := sum / time.Duration(len(recentLatencies))
257+
// Use 3x average latency as timeout, with minimum of pingTimeout
258+
adaptiveTimeout = avgLatency * 3
259+
if adaptiveTimeout < pingTimeout {
260+
adaptiveTimeout = pingTimeout
261+
}
262+
if adaptiveTimeout > 15*time.Second {
263+
adaptiveTimeout = 15 * time.Second
264+
}
265+
}
266+
267+
// Use reliable ping with multiple attempts
268+
maxAttempts := 2
269+
if consecutiveFailures > 4 {
270+
maxAttempts = 4 // More attempts when connection is unstable
271+
}
272+
273+
latency, err := reliablePing(tnet, serverIP, adaptiveTimeout, maxAttempts)
192274
if err != nil {
193275
consecutiveFailures++
194-
if consecutiveFailures < 4 {
276+
277+
// Track recent latencies (add a high value for failures)
278+
recentLatencies = append(recentLatencies, adaptiveTimeout)
279+
if len(recentLatencies) > 10 {
280+
recentLatencies = recentLatencies[1:]
281+
}
282+
283+
if consecutiveFailures < 2 {
195284
logger.Debug("Periodic ping failed (%d consecutive failures): %v", consecutiveFailures, err)
196285
} else {
197286
logger.Warn("Periodic ping failed (%d consecutive failures): %v", consecutiveFailures, err)
198287
}
199-
if consecutiveFailures >= 8 && currentInterval < maxInterval {
288+
289+
// More lenient threshold for declaring connection lost under load
290+
failureThreshold := 4
291+
if consecutiveFailures >= failureThreshold && currentInterval < maxInterval {
200292
if !connectionLost {
201293
connectionLost = true
202-
logger.Warn("Connection to server lost. Continuous reconnection attempts will be made.")
294+
logger.Warn("Connection to server lost after %d failures. Continuous reconnection attempts will be made.", consecutiveFailures)
203295
stopFunc = client.SendMessageInterval("newt/ping/request", map[string]interface{}{}, 3*time.Second)
204296
// Send registration message to the server for backward compatibility
205297
err := client.SendMessage("newt/wg/register", map[string]interface{}{
@@ -216,17 +308,23 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
216308
}
217309
}
218310
}
219-
currentInterval = time.Duration(float64(currentInterval) * 1.5)
311+
currentInterval = time.Duration(float64(currentInterval) * 1.3) // Slower increase
220312
if currentInterval > maxInterval {
221313
currentInterval = maxInterval
222314
}
223315
ticker.Reset(currentInterval)
224316
logger.Debug("Increased ping check interval to %v due to consecutive failures", currentInterval)
225317
}
226318
} else {
319+
// Track recent latencies
320+
recentLatencies = append(recentLatencies, latency)
321+
if len(recentLatencies) > 10 {
322+
recentLatencies = recentLatencies[1:]
323+
}
324+
227325
if connectionLost {
228326
connectionLost = false
229-
logger.Info("Connection to server restored!")
327+
logger.Info("Connection to server restored after %d failures!", consecutiveFailures)
230328
if healthFile != "" {
231329
err := os.WriteFile(healthFile, []byte("ok"), 0644)
232330
if err != nil {
@@ -235,12 +333,12 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
235333
}
236334
}
237335
if currentInterval > pingInterval {
238-
currentInterval = time.Duration(float64(currentInterval) * 0.8)
336+
currentInterval = time.Duration(float64(currentInterval) * 0.9) // Slower decrease
239337
if currentInterval < pingInterval {
240338
currentInterval = pingInterval
241339
}
242340
ticker.Reset(currentInterval)
243-
logger.Info("Decreased ping check interval to %v after successful ping", currentInterval)
341+
logger.Debug("Decreased ping check interval to %v after successful ping", currentInterval)
244342
}
245343
consecutiveFailures = 0
246344
}

0 commit comments

Comments
 (0)