Skip to content

Commit e3c5840

Browse files
committed
k8s.io/apimachinery: add HandleCrashWithLogger and HandleErrorWithLogger
There are situations where it makes more sense to pass a logger through a call chain, primarily because passing a context would imply that the call chain should honor cancellation even though there is a different shutdown mechanism. Using the *WithContext variants would cause additional overhead for klog.NewContext, which hurts in particular for HandleCrash because that function is typically a nop that doesn't actually need to log anything. HandleCrashWithLogger avoids that overhead. For HandleError that is less relevant because it always logs, but for the sake of symmetry it also gets added. Putting klog.Logger (= logr.Logger) into the public Kubernetes Go API is okay because it's no longer realistic that these packages can ever drop the klog dependency. Callers using slog as logger in their binary can use https://github.com/veqryn/slog-context to store a slog.Logger in a context and then call the *WithContext variants, klog.FromContext will be able to use it. This is probably very rare, so there's no need for *WithSlog variants. While at it, unit testing gets enhanced and logging in panic handlers gets improved such that they are guaranteed to get a saner location when not doing any caller skipping. Previously, this was undefined.
1 parent 95d71c4 commit e3c5840

File tree

2 files changed

+164
-9
lines changed

2 files changed

+164
-9
lines changed

staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime.go

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ var (
3636
)
3737

3838
// PanicHandlers is a list of functions which will be invoked when a panic happens.
39+
//
40+
// The code invoking these handlers prepares a contextual logger so that
41+
// klog.FromContext(ctx) already skips over the panic handler itself and
42+
// several other intermediate functions, ideally such that the log output
43+
// is attributed to the code which triggered the panic.
3944
var PanicHandlers = []func(context.Context, interface{}){logPanic}
4045

4146
// HandleCrash simply catches a crash and logs an error. Meant to be called via
@@ -45,7 +50,7 @@ var PanicHandlers = []func(context.Context, interface{}){logPanic}
4550
//
4651
// E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully.
4752
//
48-
// Contextual logging: HandleCrashWithContext should be used instead of HandleCrash in code which supports contextual logging.
53+
// Contextual logging: HandleCrashWithContext or HandleCrashWithLogger should be used instead of HandleCrash in code which supports contextual logging.
4954
func HandleCrash(additionalHandlers ...func(interface{})) {
5055
if r := recover(); r != nil {
5156
additionalHandlersWithContext := make([]func(context.Context, interface{}), len(additionalHandlers))
@@ -74,10 +79,30 @@ func HandleCrashWithContext(ctx context.Context, additionalHandlers ...func(cont
7479
}
7580
}
7681

77-
// handleCrash is the common implementation of HandleCrash and HandleCrash.
82+
// HandleCrashWithLogger simply catches a crash and logs an error. Meant to be called via
83+
// defer. Additional context-specific handlers can be provided, and will be
84+
// called in case of panic. HandleCrash actually crashes, after calling the
85+
// handlers and logging the panic message.
86+
//
87+
// E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully.
88+
func HandleCrashWithLogger(logger klog.Logger, additionalHandlers ...func(context.Context, interface{})) {
89+
if r := recover(); r != nil {
90+
ctx := klog.NewContext(context.Background(), logger)
91+
handleCrash(ctx, r, additionalHandlers...)
92+
}
93+
}
94+
95+
// handleCrash is the common implementation of the HandleCrash* variants.
7896
// Having those call a common implementation ensures that the stack depth
7997
// is the same regardless through which path the handlers get invoked.
8098
func handleCrash(ctx context.Context, r any, additionalHandlers ...func(context.Context, interface{})) {
99+
// We don't really know how many call frames to skip because the Go
100+
// panic handler is between us and the code where the panic occurred.
101+
// If it's one function (as in Go 1.21), then skipping four levels
102+
// gets us to the function which called the `defer HandleCrashWithontext(...)`.
103+
logger := klog.FromContext(ctx).WithCallDepth(4)
104+
ctx = klog.NewContext(ctx, logger)
105+
81106
for _, fn := range PanicHandlers {
82107
fn(ctx, r)
83108
}
@@ -106,11 +131,7 @@ func logPanic(ctx context.Context, r interface{}) {
106131
stacktrace := make([]byte, size)
107132
stacktrace = stacktrace[:runtime.Stack(stacktrace, false)]
108133

109-
// We don't really know how many call frames to skip because the Go
110-
// panic handler is between us and the code where the panic occurred.
111-
// If it's one function (as in Go 1.21), then skipping four levels
112-
// gets us to the function which called the `defer HandleCrashWithontext(...)`.
113-
logger := klog.FromContext(ctx).WithCallDepth(4)
134+
logger := klog.FromContext(ctx)
114135

115136
// For backwards compatibility, conversion to string
116137
// is handled here instead of defering to the logging
@@ -176,12 +197,19 @@ func HandleError(err error) {
176197
// and key/value pairs.
177198
//
178199
// This variant should be used instead of HandleError because it supports
179-
// structured, contextual logging.
200+
// structured, contextual logging. Alternatively, [HandleErrorWithLogger] can
201+
// be used if a logger is available instead of a context.
180202
func HandleErrorWithContext(ctx context.Context, err error, msg string, keysAndValues ...interface{}) {
181203
handleError(ctx, err, msg, keysAndValues...)
182204
}
183205

184-
// handleError is the common implementation of HandleError and HandleErrorWithContext.
206+
// HandleErrorWithLogger is an alternative to [HandlerErrorWithContext] which accepts
207+
// a logger for contextual logging.
208+
func HandleErrorWithLogger(logger klog.Logger, err error, msg string, keysAndValues ...interface{}) {
209+
handleError(klog.NewContext(context.Background(), logger), err, msg, keysAndValues...)
210+
}
211+
212+
// handleError is the common implementation of the HandleError* variants.
185213
// Using this common implementation ensures that the stack depth
186214
// is the same regardless through which path the handlers get invoked.
187215
func handleError(ctx context.Context, err error, msg string, keysAndValues ...interface{}) {

staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime_test.go

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,22 @@ package runtime
1919
import (
2020
"bytes"
2121
"context"
22+
"errors"
2223
"fmt"
2324
"io"
2425
"net/http"
2526
"os"
2627
"regexp"
28+
"runtime"
2729
"strings"
2830
"sync"
2931
"testing"
3032
"time"
33+
34+
"github.com/stretchr/testify/assert"
35+
36+
"k8s.io/klog/v2"
37+
"k8s.io/klog/v2/textlogger"
3138
)
3239

3340
func TestHandleCrash(t *testing.T) {
@@ -36,6 +43,7 @@ func TestHandleCrash(t *testing.T) {
3643
t.Errorf("Expected a panic to recover from")
3744
}
3845
}()
46+
//nolint:logcheck // Intentionally uses the old API.
3947
defer HandleCrash()
4048
panic("Test Panic")
4149
}
@@ -55,6 +63,7 @@ func TestCustomHandleCrash(t *testing.T) {
5563
t.Errorf("Expected a panic to recover from")
5664
}
5765
}()
66+
//nolint:logcheck // Intentionally uses the old API.
5867
defer HandleCrash()
5968
panic("test")
6069
}()
@@ -73,6 +82,7 @@ func TestCustomHandleError(t *testing.T) {
7382
},
7483
}
7584
err := fmt.Errorf("test")
85+
//nolint:logcheck // Intentionally uses the old API.
7686
HandleError(err)
7787
if result != err {
7888
t.Errorf("did not receive custom handler")
@@ -86,6 +96,7 @@ func TestHandleCrashLog(t *testing.T) {
8696
t.Fatalf("expected a panic to recover from")
8797
}
8898
}()
99+
//nolint:logcheck // Intentionally uses the old API.
89100
defer HandleCrash()
90101
panic("test panic")
91102
})
@@ -119,13 +130,80 @@ func TestHandleCrashLog(t *testing.T) {
119130
}
120131
}
121132

133+
func TestHandleCrashContextual(t *testing.T) {
134+
for name, handleCrash := range map[string]func(logger klog.Logger, trigger func(), additionalHandlers ...func(context.Context, interface{})){
135+
"WithLogger": func(logger klog.Logger, trigger func(), additionalHandlers ...func(context.Context, interface{})) {
136+
logger = logger.WithCallDepth(2) // This function *and* the trigger helper.
137+
defer HandleCrashWithLogger(logger, additionalHandlers...)
138+
trigger()
139+
},
140+
"WithContext": func(logger klog.Logger, trigger func(), additionalHandlers ...func(context.Context, interface{})) {
141+
logger = logger.WithCallDepth(2)
142+
defer HandleCrashWithContext(klog.NewContext(context.Background(), logger), additionalHandlers...)
143+
trigger()
144+
},
145+
} {
146+
t.Run(name, func(t *testing.T) {
147+
for name, tt := range map[string]struct {
148+
trigger func()
149+
expectPanic string
150+
}{
151+
"no-panic": {
152+
trigger: func() {},
153+
expectPanic: "",
154+
},
155+
"string-panic": {
156+
trigger: func() { panic("fake") },
157+
expectPanic: "fake",
158+
},
159+
"int-panic": {
160+
trigger: func() { panic(42) },
161+
expectPanic: "42",
162+
},
163+
} {
164+
t.Run(name, func(t *testing.T) {
165+
var buffer bytes.Buffer
166+
timeInUTC := time.Date(2009, 12, 1, 13, 30, 40, 42000, time.UTC)
167+
timeString := "1201 13:30:40.000042"
168+
logger := textlogger.NewLogger(textlogger.NewConfig(
169+
textlogger.FixedTime(timeInUTC),
170+
textlogger.Output(&buffer),
171+
))
172+
ReallyCrash = false
173+
defer func() { ReallyCrash = true }()
174+
175+
handler := func(ctx context.Context, r interface{}) {
176+
// Same formatting as in HandleCrash.
177+
str, ok := r.(string)
178+
if !ok {
179+
str = fmt.Sprintf("%v", r)
180+
}
181+
klog.FromContext(ctx).Info("handler called", "panic", str)
182+
}
183+
184+
_, _, line, _ := runtime.Caller(0)
185+
handleCrash(logger, tt.trigger, handler)
186+
if tt.expectPanic != "" {
187+
assert.Contains(t, buffer.String(), fmt.Sprintf(`E%s %7d runtime_test.go:%d] "Observed a panic" panic=%q`, timeString, os.Getpid(), line+1, tt.expectPanic))
188+
assert.Contains(t, buffer.String(), fmt.Sprintf(`I%s %7d runtime_test.go:%d] "handler called" panic=%q
189+
`, timeString, os.Getpid(), line+1, tt.expectPanic))
190+
} else {
191+
assert.Empty(t, buffer.String())
192+
}
193+
})
194+
}
195+
})
196+
}
197+
}
198+
122199
func TestHandleCrashLogSilenceHTTPErrAbortHandler(t *testing.T) {
123200
log, err := captureStderr(func() {
124201
defer func() {
125202
if r := recover(); r != http.ErrAbortHandler {
126203
t.Fatalf("expected to recover from http.ErrAbortHandler")
127204
}
128205
}()
206+
//nolint:logcheck // Intentionally uses the old API.
129207
defer HandleCrash()
130208
panic(http.ErrAbortHandler)
131209
})
@@ -184,3 +262,52 @@ func Test_rudimentaryErrorBackoff_OnError_ParallelSleep(t *testing.T) {
184262
t.Errorf("OnError slept for too long: %s", since)
185263
}
186264
}
265+
266+
func TestHandleError(t *testing.T) {
267+
for name, handleError := range map[string]func(logger klog.Logger, err error, msg string, keysAndValues ...interface{}){
268+
"WithLogger": func(logger klog.Logger, err error, msg string, keysAndValues ...interface{}) {
269+
helper, logger := logger.WithCallStackHelper()
270+
helper()
271+
HandleErrorWithLogger(logger, err, msg, keysAndValues...)
272+
},
273+
"WithContext": func(logger klog.Logger, err error, msg string, keysAndValues ...interface{}) {
274+
helper, logger := logger.WithCallStackHelper()
275+
helper()
276+
HandleErrorWithContext(klog.NewContext(context.Background(), logger), err, msg, keysAndValues...)
277+
},
278+
} {
279+
t.Run(name, func(t *testing.T) {
280+
for name, tc := range map[string]struct {
281+
err error
282+
msg string
283+
keysAndValues []interface{}
284+
expectLog string
285+
}{
286+
"no-error": {
287+
msg: "hello world",
288+
expectLog: `"hello world" logger="UnhandledError"`,
289+
},
290+
"complex": {
291+
err: errors.New("fake error"),
292+
msg: "ignore",
293+
keysAndValues: []interface{}{"a", 1, "b", "c"},
294+
expectLog: `"ignore" err="fake error" logger="UnhandledError" a=1 b="c"`,
295+
},
296+
} {
297+
t.Run(name, func(t *testing.T) {
298+
var buffer bytes.Buffer
299+
timeInUTC := time.Date(2009, 12, 1, 13, 30, 40, 42000, time.UTC)
300+
timeString := "1201 13:30:40.000042"
301+
logger := textlogger.NewLogger(textlogger.NewConfig(
302+
textlogger.FixedTime(timeInUTC),
303+
textlogger.Output(&buffer),
304+
))
305+
306+
_, _, line, _ := runtime.Caller(0)
307+
handleError(logger, tc.err, tc.msg, tc.keysAndValues...)
308+
assert.Equal(t, fmt.Sprintf("E%s %7d runtime_test.go:%d] %s\n", timeString, os.Getpid(), line+1, tc.expectLog), buffer.String())
309+
})
310+
}
311+
})
312+
}
313+
}

0 commit comments

Comments
 (0)