Skip to content

Commit dbc6264

Browse files
torcolvinCopilot
andauthored
CBG-4988 log panics in http (#7876)
Co-authored-by: Copilot <[email protected]>
1 parent eaab17f commit dbc6264

File tree

7 files changed

+95
-5
lines changed

7 files changed

+95
-5
lines changed

base/devmode.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,15 @@ func IsDevMode() bool {
2525
// The SG test harness will ensure AssertionFailCount is zero at the end of tests, even without devmode enabled.
2626
// Note: Callers MUST ensure code is safe to continue executing after the Assert (e.g. by returning an error) and MUST NOT be used like a panic that will halt.
2727
func AssertfCtx(ctx context.Context, format string, args ...any) {
28+
2829
SyncGatewayStats.GlobalStats.ResourceUtilization.AssertionFailCount.Add(1)
2930
assertLogFn(ctx, AssertionFailedPrefix+format, args...)
3031
}
32+
33+
// PanicRecoveryfCtx logs a warning message. This function is suitable for recovering from a panic in a location where
34+
// it is expected to continue operation, like HTTP handlers.
35+
// When compiled with the `cb_sg_devmode` build tag this function panics to fail the test harness for better dev-time visibility.
36+
// In all cases, the WarnCount stat is incremented.
37+
func PanicRecoveryfCtx(ctx context.Context, format string, args ...any) {
38+
panicRecoveryLogFn(ctx, format, args...)
39+
}

base/devmode_off.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,5 @@ package base
1414
const cbSGDevModeBuildTagSet = false
1515

1616
var assertLogFn logFn = ErrorfCtx
17+
18+
var panicRecoveryLogFn logFn = WarnfCtx

base/devmode_on.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@
1111

1212
package base
1313

14+
import "context"
15+
1416
const cbSGDevModeBuildTagSet = true
1517

1618
var assertLogFn logFn = PanicfCtx
19+
20+
func panicRecoveryLogFn(ctx context.Context, format string, args ...any) {
21+
// add a warn count since the devmode=off path also adds a warn count
22+
SyncGatewayStats.GlobalStats.ResourceUtilization.WarnCount.Add(1)
23+
PanicfCtx(ctx, format, args...)
24+
}

db/import_listener.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ func (il *importListener) ProcessFeedEvent(event sgbucket.FeedEvent) bool {
135135
docID := string(event.Key)
136136
defer func() {
137137
if r := recover(); r != nil {
138-
base.WarnfCtx(ctx, "[%s] Unexpected panic importing document %s - skipping import: \n %s", r, base.UD(docID), debug.Stack())
138+
base.PanicRecoveryfCtx(ctx, "[%s] Unexpected panic importing document %s - skipping import: \n %s", r, base.UD(docID), debug.Stack())
139139
if il.importStats != nil {
140140
il.importStats.ImportErrorCount.Add(1)
141141
}

db/import_listener_test.go

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,20 @@ func TestImportFeedEventRecover(t *testing.T) {
2525
},
2626
}
2727
startWarnCount := base.SyncGatewayStats.GlobalStats.ResourceUtilizationStats().WarnCount.Value()
28+
2829
// assert false to indicate that this checkpoint will not be incremented
29-
require.False(t, listener.ProcessFeedEvent(sgbucket.FeedEvent{
30-
Key: []byte("example-doc"),
31-
Opcode: sgbucket.FeedOpMutation,
32-
}))
30+
if base.IsDevMode() {
31+
require.Panics(t, func() {
32+
listener.ProcessFeedEvent(sgbucket.FeedEvent{
33+
Key: []byte("example-doc"),
34+
Opcode: sgbucket.FeedOpMutation,
35+
})
36+
})
37+
} else {
38+
require.False(t, listener.ProcessFeedEvent(sgbucket.FeedEvent{
39+
Key: []byte("example-doc"),
40+
Opcode: sgbucket.FeedOpMutation,
41+
}))
42+
}
3343
require.Equal(t, startWarnCount+1, base.SyncGatewayStats.GlobalStats.ResourceUtilizationStats().WarnCount.Value())
3444
}

rest/handler.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"net/http"
2323
"net/url"
2424
"regexp"
25+
"runtime/debug"
2526
"slices"
2627
"strconv"
2728
"strings"
@@ -127,8 +128,23 @@ type handlerMethod func(*handler) error
127128
// makeHandlerWithOptions creates an http.Handler that will run a handler with the given method handlerOptions
128129
func makeHandlerWithOptions(server *ServerContext, privs handlerPrivs, accessPermissions []Permission, responsePermissions []Permission, method handlerMethod, options handlerOptions) http.Handler {
129130
return http.HandlerFunc(func(r http.ResponseWriter, rq *http.Request) {
131+
ctx := rq.Context()
132+
var serialNumber string
133+
defer func() {
134+
// ErrAbortHandler is a sentinel panic used by http.ServeHTTP and should not be logged.
135+
if err := recover(); err != nil && err != http.ErrAbortHandler {
136+
// use leading spaces to match typical request logging
137+
base.PanicRecoveryfCtx(ctx, "HTTP %s: --> panicked:\n%s", serialNumber, debug.Stack())
138+
// repanic here so that http.ServeHTTP can catch the panic and perform usual close connection behavior
139+
panic(err)
140+
}
141+
}()
130142
serverType := getCtxServerType(rq.Context())
131143
h := newHandler(server, privs, serverType, r, rq, options)
144+
145+
// re-assign ctx and serial number for panic handler
146+
ctx = h.ctx()
147+
serialNumber = h.formatSerialNumber()
132148
err := h.invoke(method, accessPermissions, responsePermissions)
133149
h.writeError(err)
134150
if !options.skipLogDuration {

rest/handler_test.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@ package rest
1212

1313
import (
1414
"fmt"
15+
"io"
1516
"net/http"
1617
"net/http/httptest"
1718
"testing"
1819

1920
"github.com/couchbase/sync_gateway/base"
21+
"github.com/gorilla/mux"
2022
"github.com/stretchr/testify/assert"
2123
"github.com/stretchr/testify/require"
2224
)
@@ -222,3 +224,46 @@ func TestShouldCheckAdminRBAC(t *testing.T) {
222224
})
223225
}
224226
}
227+
228+
func TestHandlerRecoverLog(t *testing.T) {
229+
base.SetUpTestLogging(t, base.LevelTrace, base.KeyAll)
230+
testCases := []struct {
231+
name string
232+
panicArg any
233+
shouldWarn bool
234+
}{
235+
{name: "string panic", panicArg: "test panic", shouldWarn: true},
236+
{name: "error panic", panicArg: fmt.Errorf("test error panic"), shouldWarn: true},
237+
{name: "ErrAbortHandler", panicArg: http.ErrAbortHandler, shouldWarn: false},
238+
}
239+
for _, tc := range testCases {
240+
t.Run(tc.name, func(t *testing.T) {
241+
config := BootstrapStartupConfigForTest(t) // share config between both servers in test to share a groupID
242+
sc, closeFn := StartServerWithConfig(t, &config)
243+
defer closeFn()
244+
245+
ctx := base.TestCtx(t)
246+
require.NoError(t, sc.WaitForRESTAPIs(ctx))
247+
startWarnCount := base.SyncGatewayStats.GlobalStats.ResourceUtilizationStats().WarnCount.Value()
248+
handler := makeHandlerWithOptions(sc, regularPrivs, nil, nil, func(_ *handler) error {
249+
panic(tc.panicArg)
250+
}, handlerOptions{})
251+
r := mux.NewRouter()
252+
r.Use(withServerType(publicServer))
253+
r.Handle("/", handler)
254+
server := httptest.NewServer(r)
255+
defer server.Close()
256+
257+
client := server.Client()
258+
resp, err := client.Get(server.URL)
259+
if tc.shouldWarn {
260+
require.ErrorIs(t, err, io.EOF)
261+
require.Equal(t, startWarnCount+1, base.SyncGatewayStats.GlobalStats.ResourceUtilizationStats().WarnCount.Value())
262+
} else {
263+
require.NoError(t, err)
264+
require.NoError(t, resp.Body.Close())
265+
require.Equal(t, startWarnCount, base.SyncGatewayStats.GlobalStats.ResourceUtilizationStats().WarnCount.Value())
266+
}
267+
})
268+
}
269+
}

0 commit comments

Comments
 (0)