@@ -45,9 +45,17 @@ func ping(tnet *netstack.Net, dst string, timeout time.Duration) (time.Duration,
4545 }
4646 defer socket .Close ()
4747
48+ // Set socket buffer sizes to handle high bandwidth scenarios
49+ if tcpConn , ok := socket .(interface { SetReadBuffer (int ) error }); ok {
50+ tcpConn .SetReadBuffer (64 * 1024 )
51+ }
52+ if tcpConn , ok := socket .(interface { SetWriteBuffer (int ) error }); ok {
53+ tcpConn .SetWriteBuffer (64 * 1024 )
54+ }
55+
4856 requestPing := icmp.Echo {
4957 Seq : rand .Intn (1 << 16 ),
50- Data : []byte ("f " ),
58+ Data : []byte ("newtping " ),
5159 }
5260
5361 icmpBytes , err := (& icmp.Message {Type : ipv4 .ICMPTypeEcho , Code : 0 , Body : & requestPing }).Marshal (nil )
@@ -65,12 +73,14 @@ func ping(tnet *netstack.Net, dst string, timeout time.Duration) (time.Duration,
6573 return 0 , fmt .Errorf ("failed to write ICMP packet: %w" , err )
6674 }
6775
68- n , err := socket .Read (icmpBytes [:])
76+ // Use larger buffer for reading to handle potential network congestion
77+ readBuffer := make ([]byte , 1500 )
78+ n , err := socket .Read (readBuffer )
6979 if err != nil {
7080 return 0 , fmt .Errorf ("failed to read ICMP packet: %w" , err )
7181 }
7282
73- replyPacket , err := icmp .ParseMessage (1 , icmpBytes [:n ])
83+ replyPacket , err := icmp .ParseMessage (1 , readBuffer [:n ])
7484 if err != nil {
7585 return 0 , fmt .Errorf ("failed to parse ICMP packet: %w" , err )
7686 }
@@ -92,6 +102,51 @@ func ping(tnet *netstack.Net, dst string, timeout time.Duration) (time.Duration,
92102 return latency , nil
93103}
94104
105+ // reliablePing performs multiple ping attempts with adaptive timeout
106+ func reliablePing (tnet * netstack.Net , dst string , baseTimeout time.Duration , maxAttempts int ) (time.Duration , error ) {
107+ var lastErr error
108+ var totalLatency time.Duration
109+ successCount := 0
110+
111+ for attempt := 1 ; attempt <= maxAttempts ; attempt ++ {
112+ // Adaptive timeout: increase timeout for later attempts
113+ timeout := baseTimeout + time .Duration (attempt - 1 )* 500 * time .Millisecond
114+
115+ // Add jitter to prevent thundering herd
116+ jitter := time .Duration (rand .Intn (100 )) * time .Millisecond
117+ timeout += jitter
118+
119+ latency , err := ping (tnet , dst , timeout )
120+ if err != nil {
121+ lastErr = err
122+ logger .Debug ("Ping attempt %d/%d failed: %v" , attempt , maxAttempts , err )
123+
124+ // Brief pause between attempts with exponential backoff
125+ if attempt < maxAttempts {
126+ backoff := time .Duration (attempt ) * 50 * time .Millisecond
127+ time .Sleep (backoff )
128+ }
129+ continue
130+ }
131+
132+ totalLatency += latency
133+ successCount ++
134+
135+ // If we get at least one success, we can return early for health checks
136+ if successCount > 0 {
137+ avgLatency := totalLatency / time .Duration (successCount )
138+ logger .Debug ("Reliable ping succeeded after %d attempts, avg latency: %v" , attempt , avgLatency )
139+ return avgLatency , nil
140+ }
141+ }
142+
143+ if successCount == 0 {
144+ return 0 , fmt .Errorf ("all %d ping attempts failed, last error: %v" , maxAttempts , lastErr )
145+ }
146+
147+ return totalLatency / time .Duration (successCount ), nil
148+ }
149+
95150func pingWithRetry (tnet * netstack.Net , dst string , timeout time.Duration ) (stopChan chan struct {}, err error ) {
96151
97152 if healthFile != "" {
@@ -180,6 +235,9 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
180235 consecutiveFailures := 0
181236 connectionLost := false
182237
238+ // Track recent latencies for adaptive timeout calculation
239+ recentLatencies := make ([]time.Duration , 0 , 10 )
240+
183241 pingStopChan := make (chan struct {})
184242
185243 go func () {
@@ -188,18 +246,52 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
188246 for {
189247 select {
190248 case <- ticker .C :
191- _ , err := ping (tnet , serverIP , pingTimeout )
249+ // Calculate adaptive timeout based on recent latencies
250+ adaptiveTimeout := pingTimeout
251+ if len (recentLatencies ) > 0 {
252+ var sum time.Duration
253+ for _ , lat := range recentLatencies {
254+ sum += lat
255+ }
256+ avgLatency := sum / time .Duration (len (recentLatencies ))
257+ // Use 3x average latency as timeout, with minimum of pingTimeout
258+ adaptiveTimeout = avgLatency * 3
259+ if adaptiveTimeout < pingTimeout {
260+ adaptiveTimeout = pingTimeout
261+ }
262+ if adaptiveTimeout > 15 * time .Second {
263+ adaptiveTimeout = 15 * time .Second
264+ }
265+ }
266+
267+ // Use reliable ping with multiple attempts
268+ maxAttempts := 2
269+ if consecutiveFailures > 4 {
270+ maxAttempts = 4 // More attempts when connection is unstable
271+ }
272+
273+ latency , err := reliablePing (tnet , serverIP , adaptiveTimeout , maxAttempts )
192274 if err != nil {
193275 consecutiveFailures ++
194- if consecutiveFailures < 4 {
276+
277+ // Track recent latencies (add a high value for failures)
278+ recentLatencies = append (recentLatencies , adaptiveTimeout )
279+ if len (recentLatencies ) > 10 {
280+ recentLatencies = recentLatencies [1 :]
281+ }
282+
283+ if consecutiveFailures < 2 {
195284 logger .Debug ("Periodic ping failed (%d consecutive failures): %v" , consecutiveFailures , err )
196285 } else {
197286 logger .Warn ("Periodic ping failed (%d consecutive failures): %v" , consecutiveFailures , err )
198287 }
199- if consecutiveFailures >= 8 && currentInterval < maxInterval {
288+
289+ // More lenient threshold for declaring connection lost under load
290+ failureThreshold := 4
291+ if consecutiveFailures >= failureThreshold && currentInterval < maxInterval {
200292 if ! connectionLost {
201293 connectionLost = true
202- logger .Warn ("Connection to server lost. Continuous reconnection attempts will be made." )
294+ logger .Warn ("Connection to server lost after %d failures . Continuous reconnection attempts will be made." , consecutiveFailures )
203295 stopFunc = client .SendMessageInterval ("newt/ping/request" , map [string ]interface {}{}, 3 * time .Second )
204296 // Send registration message to the server for backward compatibility
205297 err := client .SendMessage ("newt/wg/register" , map [string ]interface {}{
@@ -216,17 +308,23 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
216308 }
217309 }
218310 }
219- currentInterval = time .Duration (float64 (currentInterval ) * 1.5 )
311+ currentInterval = time .Duration (float64 (currentInterval ) * 1.3 ) // Slower increase
220312 if currentInterval > maxInterval {
221313 currentInterval = maxInterval
222314 }
223315 ticker .Reset (currentInterval )
224316 logger .Debug ("Increased ping check interval to %v due to consecutive failures" , currentInterval )
225317 }
226318 } else {
319+ // Track recent latencies
320+ recentLatencies = append (recentLatencies , latency )
321+ if len (recentLatencies ) > 10 {
322+ recentLatencies = recentLatencies [1 :]
323+ }
324+
227325 if connectionLost {
228326 connectionLost = false
229- logger .Info ("Connection to server restored!" )
327+ logger .Info ("Connection to server restored after %d failures!" , consecutiveFailures )
230328 if healthFile != "" {
231329 err := os .WriteFile (healthFile , []byte ("ok" ), 0644 )
232330 if err != nil {
@@ -235,12 +333,12 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
235333 }
236334 }
237335 if currentInterval > pingInterval {
238- currentInterval = time .Duration (float64 (currentInterval ) * 0.8 )
336+ currentInterval = time .Duration (float64 (currentInterval ) * 0.9 ) // Slower decrease
239337 if currentInterval < pingInterval {
240338 currentInterval = pingInterval
241339 }
242340 ticker .Reset (currentInterval )
243- logger .Info ("Decreased ping check interval to %v after successful ping" , currentInterval )
341+ logger .Debug ("Decreased ping check interval to %v after successful ping" , currentInterval )
244342 }
245343 consecutiveFailures = 0
246344 }
0 commit comments