Skip to content

Commit 61b8654

Browse files
committed
addressing feedback
this commit ensures that metrics are properly accounted for Signed-off-by: Imran Pochi <[email protected]>
1 parent 1ccdfbe commit 61b8654

File tree

1 file changed

+19
-4
lines changed

1 file changed

+19
-4
lines changed

pkg/server/tunnel.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ func (t *Tunnel) ServeHTTP(w http.ResponseWriter, r *http.Request) {
8080
if err != nil {
8181
klog.ErrorS(err, "no tunnels available")
8282
conn.Write([]byte(fmt.Sprintf("HTTP/1.1 500 Internal Server Error\r\nContent-Type: text/plain\r\n\r\ncurrently no tunnels available: %v", err)))
83-
// conn.Close() is handled by the top-level defer
83+
// The hijacked connection will be closed by the closeOnce defer.
8484
return
8585
}
8686
closed := make(chan struct{})
@@ -104,13 +104,27 @@ func (t *Tunnel) ServeHTTP(w http.ResponseWriter, r *http.Request) {
104104
// This defer acts as a safeguard to ensure we clean up the pending dial
105105
// if the connection is never successfully established.
106106
established := false
107+
// Track if the frontend client explicitly closed the connection.
108+
var frontendDidClose bool
107109
defer func() {
108110
if !established {
109-
if t.Server.PendingDial.Remove(random) != nil {
110-
// This metric is observed only when the frontend closes the connection.
111-
// Other failure reasons are observed elsewhere.
111+
// Always attempt to remove the pending dial.
112+
// This is safe even if another goroutine already removed it.
113+
removedConn := t.Server.PendingDial.Remove(random)
114+
115+
// If the frontend explicitly closed the connection, we record that failure reason.
116+
// This is more accurate than inferring the reason from whether Remove() found an entry.
117+
if frontendDidClose {
112118
metrics.Metrics.ObserveDialFailure(metrics.DialFailureFrontendClose)
119+
} else if removedConn != nil {
120+
// If the frontend did *not* close, but we *did* remove the pending dial,
121+
// it implies we hit a timeout or backend context cancellation before
122+
// the agent could respond. We can attribute this to a backend failure.
123+
metrics.Metrics.ObserveDialFailure(metrics.DialFailureBackendClose)
113124
}
125+
// If removedConn is nil and frontendDidClose is false, it means another
126+
// goroutine (e.g., processing a DIAL_RSP) already handled the cleanup,
127+
// so we don't need to do anything here.
114128
}
115129
}()
116130

@@ -143,6 +157,7 @@ func (t *Tunnel) ServeHTTP(w http.ResponseWriter, r *http.Request) {
143157
klog.V(3).InfoS("Connection established, sent 200 OK", "host", r.Host, "agentID", connection.agentID, "connectionID", connection.connectID)
144158

145159
case <-closed: // Connection was closed by the client before being established
160+
frontendDidClose = true
146161
klog.V(2).InfoS("Frontend connection closed before being established", "host", r.Host, "dialID", connection.dialID, "agentID", connection.agentID)
147162
// The deferred cleanup will run when we return here.
148163
return

0 commit comments

Comments
 (0)