@@ -4,8 +4,10 @@ import (
44 "context"
55 "encoding/json"
66 "errors"
7+ "fmt"
78 "io"
89 "io/ioutil"
10+ "os"
911 "reflect"
1012 "sync"
1113 "sync/atomic"
@@ -19,6 +21,8 @@ const wsCancel = "xrpc.cancel"
1921const chValue = "xrpc.ch.val"
2022const chClose = "xrpc.ch.close"
2123
24+ var debugTrace = os .Getenv ("JSONRPC_ENABLE_DEBUG_TRACE" ) == "1"
25+
2226type frame struct {
2327 // common
2428 Jsonrpc string `json:"jsonrpc"`
@@ -92,11 +96,7 @@ type wsConn struct {
9296
9397// nextMessage wait for one message and puts it to the incoming channel
9498func (c * wsConn ) nextMessage () {
95- if c .timeout > 0 {
96- if err := c .conn .SetReadDeadline (time .Now ().Add (c .timeout )); err != nil {
97- log .Error ("setting read deadline" , err )
98- }
99- }
99+ c .resetReadDeadline ()
100100 msgType , r , err := c .conn .NextReader ()
101101 if err != nil {
102102 c .incomingErr = err
@@ -135,6 +135,10 @@ func (c *wsConn) sendRequest(req request) error {
135135 c .writeLk .Lock ()
136136 defer c .writeLk .Unlock ()
137137
138+ if debugTrace {
139+ log .Debugw ("sendRequest" , "req" , req .Method , "id" , req .ID )
140+ }
141+
138142 if err := c .conn .WriteJSON (req ); err != nil {
139143 return err
140144 }
@@ -275,10 +279,11 @@ func (c *wsConn) handleChanOut(ch reflect.Value, req interface{}) error {
275279
276280// handleCtxAsync handles context lifetimes for client
277281// TODO: this should be aware of events going through chanHandlers, and quit
278- // when the related channel is closed.
279- // This should also probably be a single goroutine,
280- // Note that not doing this should be fine for now as long as we are using
281- // contexts correctly (cancelling when async functions are no longer is use)
282+ //
283+ // when the related channel is closed.
284+ // This should also probably be a single goroutine,
285+ // Note that not doing this should be fine for now as long as we are using
286+ // contexts correctly (cancelling when async functions are no longer is use)
282287func (c * wsConn ) handleCtxAsync (actx context.Context , id interface {}) {
283288 <- actx .Done ()
284289
@@ -486,6 +491,14 @@ func (c *wsConn) setupPings() func() {
486491 }
487492 return nil
488493 })
494+ c .conn .SetPingHandler (func (appData string ) error {
495+ // treat pings as pongs - this lets us register server activity even if it's too busy to respond to our pings
496+ select {
497+ case c .pongs <- struct {}{}:
498+ default :
499+ }
500+ return nil
501+ })
489502
490503 stop := make (chan struct {})
491504
@@ -600,8 +613,13 @@ func (c *wsConn) handleWsConn(ctx context.Context) {
600613 timeoutCh = timeoutTimer .C
601614 }
602615
616+ start := time .Now ()
617+ action := ""
618+
603619 select {
604620 case r , ok := <- c .incoming :
621+ action = "incoming"
622+
605623 err := c .incomingErr
606624
607625 if ok {
@@ -611,9 +629,11 @@ func (c *wsConn) handleWsConn(ctx context.Context) {
611629 var frame frame
612630 if err = json .NewDecoder (r ).Decode (& frame ); err == nil {
613631 if frame .ID , err = normalizeID (frame .ID ); err == nil {
632+ action = fmt .Sprintf ("incoming(%s,%v)" , frame .Method , frame .ID )
633+
614634 c .handleFrame (ctx , frame )
615635 go c .nextMessage ()
616- continue
636+ break
617637 }
618638 }
619639 }
@@ -628,6 +648,8 @@ func (c *wsConn) handleWsConn(ctx context.Context) {
628648 return // failed to reconnect
629649 }
630650 case req := <- c .requests :
651+ action = fmt .Sprintf ("send-request(%s,%v)" , req .req .Method , req .req .ID )
652+
631653 c .writeLk .Lock ()
632654 if req .req .ID != nil {
633655 if c .incomingErr != nil { // No conn?, immediate fail
@@ -649,11 +671,9 @@ func (c *wsConn) handleWsConn(ctx context.Context) {
649671 log .Errorf ("sendReqest failed (Handle me): %s" , err )
650672 }
651673 case <- c .pongs :
652- if c .timeout > 0 {
653- if err := c .conn .SetReadDeadline (time .Now ().Add (c .timeout )); err != nil {
654- log .Error ("setting read deadline" , err )
655- }
656- }
674+ action = "pong"
675+
676+ c .resetReadDeadline ()
657677 case <- timeoutCh :
658678 if c .pingInterval == 0 {
659679 // pings not running, this is perfectly normal
@@ -665,7 +685,7 @@ func (c *wsConn) handleWsConn(ctx context.Context) {
665685 log .Warnw ("timed-out websocket close error" , "error" , err )
666686 }
667687 c .writeLk .Unlock ()
668- log .Errorw ("Connection timeout" , "remote" , c .conn .RemoteAddr ())
688+ log .Errorw ("Connection timeout" , "remote" , c .conn .RemoteAddr (), "lastAction" , action )
669689 // The server side does not perform the reconnect operation, so need to exit
670690 if c .connFactory == nil {
671691 return
@@ -684,6 +704,21 @@ func (c *wsConn) handleWsConn(ctx context.Context) {
684704 c .writeLk .Unlock ()
685705 return
686706 }
707+
708+ if c .pingInterval > 0 && time .Since (start ) > c .pingInterval * 2 {
709+ log .Warnw ("websocket long time no response" , "lastAction" , action , "time" , time .Since (start ))
710+ }
711+ if debugTrace {
712+ log .Debugw ("websocket action" , "lastAction" , action , "time" , time .Since (start ))
713+ }
714+ }
715+ }
716+
717+ func (c * wsConn ) resetReadDeadline () {
718+ if c .timeout > 0 {
719+ if err := c .conn .SetReadDeadline (time .Now ().Add (c .timeout )); err != nil {
720+ log .Error ("setting read deadline" , err )
721+ }
687722 }
688723}
689724
0 commit comments