Skip to content

Commit 8161723

Browse files
refactor: enhance tunnel client health check and control connection management
- Update health check mechanism in TunnelClient to send a KEEPALIVE message instead of an empty write, improving connection stability. - Modify control connection handling in the server to acknowledge KEEPALIVE messages and respond to PING requests with PONG, ensuring better communication and idle timeout management. - Improve logging for control connection status to provide clearer feedback on tunnel activity and client reconnections.
1 parent 44b8d73 commit 8161723

File tree

2 files changed

+77
-40
lines changed

2 files changed

+77
-40
lines changed

client/internal/tunnel/client.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,11 @@ func (tc *TunnelClient) isHealthy() bool {
258258
return false
259259
}
260260

261-
// Try to write a simple ping (this is a basic health check)
262-
// In a more sophisticated implementation, you might have a proper ping/pong protocol
261+
// Send KEEPALIVE message to keep connection alive
262+
// The server will respond but we don't wait for it here to avoid
263+
// conflicts with handleTunnelConnections which is also reading
263264
conn.SetWriteDeadline(time.Now().Add(5 * time.Second))
264-
_, err := conn.Write([]byte{}) // Empty write to test connection
265+
_, err := fmt.Fprintf(conn, "KEEPALIVE\n")
265266
conn.SetWriteDeadline(time.Time{})
266267

267268
return err == nil
@@ -311,7 +312,7 @@ func (tc *TunnelClient) handleTunnelConnections() {
311312
case <-tc.stopSignal:
312313
return
313314
default:
314-
// Read connection request from server
315+
// Read messages from server
315316
line, err := reader.ReadString('\n')
316317
if err != nil {
317318
if !strings.Contains(err.Error(), "use of closed network connection") {
@@ -342,6 +343,9 @@ func (tc *TunnelClient) handleTunnelConnections() {
342343
// Handle this connection in a separate goroutine
343344
tc.wg.Add(1)
344345
go tc.handleDataConnection(connID)
346+
} else if line == "PONG" {
347+
// Server keepalive response - connection is healthy
348+
// Just ignore it, we already know we're connected
345349
}
346350
}
347351
}

server/internal/server/server.go

Lines changed: 69 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -322,20 +322,53 @@ func (s *Server) handleControlConnection(conn net.Conn) {
322322
// Start handling tunnel connections
323323
go tunnel.handleTunnel()
324324

325-
// Monitor control connection
325+
// Monitor control connection - but don't kill tunnel on errors
326+
// Tunnels should only die on explicit DISCONNECT, not on idle timeouts
326327
for {
328+
// Set a read deadline to prevent indefinite blocking
329+
conn.SetReadDeadline(time.Now().Add(5 * time.Minute))
327330
line, err := reader.ReadString('\n')
331+
328332
if err != nil {
329-
log.Printf("Control connection closed for tunnel %s: %v", tunnel.ID, err)
330-
s.stopTunnel(tunnel)
331-
break
333+
// Check if it's just a timeout (expected for idle connections)
334+
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
335+
// Timeout is normal - client is idle but connected
336+
// Log periodically but don't stop tunnel
337+
log.Printf("💤 Control connection idle for tunnel %s (tunnel still active)", tunnel.ID)
338+
continue
339+
}
340+
341+
// For other errors (connection closed, etc), close control conn but keep tunnel alive
342+
log.Printf("⚠️ Control connection lost for tunnel %s: %v (tunnel infrastructure remains active for reconnection)", tunnel.ID, err)
343+
conn.Close()
344+
345+
// Mark tunnel client as nil so it can be reconnected
346+
s.mu.Lock()
347+
if tunnel.Client == conn {
348+
tunnel.Client = nil
349+
log.Printf("🔄 Tunnel %s ready for client reconnection", tunnel.ID)
350+
}
351+
s.mu.Unlock()
352+
return
332353
}
354+
355+
// Clear deadline after successful read
356+
conn.SetReadDeadline(time.Time{})
357+
333358
line = strings.TrimSpace(line)
334-
if line == "DISCONNECT" {
335-
log.Printf("🚪 Client requested disconnect for tunnel %s", tunnel.ID)
359+
switch line {
360+
case "DISCONNECT":
361+
log.Printf("🚪 Client explicitly disconnected from tunnel %s", tunnel.ID)
336362
s.stopTunnel(tunnel)
337-
break
363+
return
364+
case "PING":
365+
// Respond to PING with PONG
366+
fmt.Fprintf(conn, "PONG\n")
367+
case "KEEPALIVE":
368+
// Just acknowledge keepalive, no response needed
369+
// This keeps the connection alive and prevents idle timeout
338370
}
371+
// Ignore other messages
339372
}
340373
}
341374

@@ -1027,38 +1060,38 @@ func (t *Tunnel) acceptRestoredConnections(s *Server) {
10271060
log.Printf("🌐 External connection attempt to restored port %s from %s:%d",
10281061
t.RemotePort, clientAddr.IP.String(), clientAddr.Port)
10291062

1030-
// Check if the tunnel now has an active client
1031-
t.wg.Add(1)
1032-
go func(c net.Conn) {
1033-
// Check if client is available (with a short wait)
1034-
maxWaitTime := 2 * time.Second
1035-
checkInterval := 100 * time.Millisecond
1036-
waited := time.Duration(0)
1037-
1038-
for waited < maxWaitTime {
1039-
if t.Client != nil {
1040-
// Client is now connected, handle this connection normally
1041-
log.Printf("✅ Client reconnected for restored port %s, handling connection", t.RemotePort)
1042-
t.handleConnection(c)
1043-
// handleConnection will call t.wg.Done() and close the connection
1044-
return
1063+
// Check if the tunnel now has an active client
1064+
t.wg.Add(1)
1065+
go func(c net.Conn) {
1066+
// Check if client is available (with a short wait)
1067+
maxWaitTime := 2 * time.Second
1068+
checkInterval := 100 * time.Millisecond
1069+
waited := time.Duration(0)
1070+
1071+
for waited < maxWaitTime {
1072+
if t.Client != nil {
1073+
// Client is now connected, handle this connection normally
1074+
log.Printf("✅ Client reconnected for restored port %s, handling connection", t.RemotePort)
1075+
t.handleConnection(c)
1076+
// handleConnection will call t.wg.Done() and close the connection
1077+
return
1078+
}
1079+
time.Sleep(checkInterval)
1080+
waited += checkInterval
10451081
}
1046-
time.Sleep(checkInterval)
1047-
waited += checkInterval
1048-
}
10491082

1050-
// Client still not available, close connection gracefully
1051-
// We need to call Done() and Close() here since handleConnection was not called
1052-
defer t.wg.Done()
1053-
defer c.Close()
1054-
1055-
log.Printf("⏰ Client not available for restored port %s, closing connection from %s:%d",
1056-
t.RemotePort, clientAddr.IP.String(), clientAddr.Port)
1083+
// Client still not available, close connection gracefully
1084+
// We need to call Done() and Close() here since handleConnection was not called
1085+
defer t.wg.Done()
1086+
defer c.Close()
1087+
1088+
log.Printf("⏰ Client not available for restored port %s, closing connection from %s:%d",
1089+
t.RemotePort, clientAddr.IP.String(), clientAddr.Port)
10571090

1058-
// Log the connection attempt
1059-
t.logConnectionAttempt(clientAddr.IP.String(), clientAddr.Port, "error",
1060-
"Tunnel client not connected")
1061-
}(conn)
1091+
// Log the connection attempt
1092+
t.logConnectionAttempt(clientAddr.IP.String(), clientAddr.Port, "error",
1093+
"Tunnel client not connected")
1094+
}(conn)
10621095
}
10631096
}
10641097
}

0 commit comments

Comments
 (0)