@@ -80,7 +80,7 @@ func (t *Tunnel) ServeHTTP(w http.ResponseWriter, r *http.Request) {
8080 if err != nil {
8181 klog .ErrorS (err , "no tunnels available" )
8282 conn .Write ([]byte (fmt .Sprintf ("HTTP/1.1 500 Internal Server Error\r \n Content-Type: text/plain\r \n \r \n currently no tunnels available: %v" , err )))
83- // conn.Close() is handled by the top-level defer
83+ // The hijacked connection will be closed by the closeOnce defer.
8484 return
8585 }
8686 closed := make (chan struct {})
@@ -104,13 +104,27 @@ func (t *Tunnel) ServeHTTP(w http.ResponseWriter, r *http.Request) {
104104 // This defer acts as a safeguard to ensure we clean up the pending dial
105105 // if the connection is never successfully established.
106106 established := false
107+ // Track if the frontend client explicitly closed the connection.
108+ var frontendDidClose bool
107109 defer func () {
108110 if ! established {
109- if t .Server .PendingDial .Remove (random ) != nil {
110- // This metric is observed only when the frontend closes the connection.
111- // Other failure reasons are observed elsewhere.
111+ // Always attempt to remove the pending dial.
112+ // This is safe even if another goroutine already removed it.
113+ removedConn := t .Server .PendingDial .Remove (random )
114+
115+ // If the frontend explicitly closed the connection, we record that failure reason.
116+ // This is more accurate than inferring the reason from whether Remove() found an entry.
117+ if frontendDidClose {
112118 metrics .Metrics .ObserveDialFailure (metrics .DialFailureFrontendClose )
119+ } else if removedConn != nil {
120+ // If the frontend did *not* close, but we *did* remove the pending dial,
121+ // it implies we hit a timeout or backend context cancellation before
122+ // the agent could respond. We can attribute this to a backend failure.
123+ metrics .Metrics .ObserveDialFailure (metrics .DialFailureBackendClose )
113124 }
125+ // If removedConn is nil and frontendDidClose is false, it means another
126+ // goroutine (e.g., processing a DIAL_RSP) already handled the cleanup,
127+ // so we don't need to do anything here.
114128 }
115129 }()
116130
@@ -143,6 +157,7 @@ func (t *Tunnel) ServeHTTP(w http.ResponseWriter, r *http.Request) {
143157 klog .V (3 ).InfoS ("Connection established, sent 200 OK" , "host" , r .Host , "agentID" , connection .agentID , "connectionID" , connection .connectID )
144158
145159 case <- closed : // Connection was closed by the client before being established
160+ frontendDidClose = true
146161 klog .V (2 ).InfoS ("Frontend connection closed before being established" , "host" , r .Host , "dialID" , connection .dialID , "agentID" , connection .agentID )
147162 // The deferred cleanup will run when we return here.
148163 return
0 commit comments