Skip to content

Commit 3b4d309

Browse files
committed
fix: re-enable async logging with proper shutdown flushing
1 parent 7519ee2 commit 3b4d309

File tree

8 files changed

+855
-9
lines changed

8 files changed

+855
-9
lines changed

cmd/spicedb/main.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package main
22

33
import (
44
"errors"
5+
"fmt"
56
"os"
67

78
"github.com/rs/zerolog"
@@ -18,11 +19,20 @@ import (
1819
)
1920

2021
func main() {
22+
os.Exit(run())
23+
}
24+
25+
func run() int {
2126
memoryprotection.InitDefaultMemoryUsageProvider()
2227

2328
// Set up root logger
2429
// This will typically be overwritten by the logging setup for a given command.
2530
log.SetGlobalLogger(zerolog.New(os.Stderr).Level(zerolog.InfoLevel))
31+
defer func() {
32+
if err := log.Close(); err != nil {
33+
fmt.Fprintf(os.Stderr, "warning: failed to flush logs: %v\n", err)
34+
}
35+
}()
2636

2737
// Enable Kubernetes gRPC resolver
2838
kuberesolver.RegisterInCluster()
@@ -33,7 +43,8 @@ func main() {
3343
// Build the complete command structure
3444
rootCmd, err := cmd.BuildRootCommand()
3545
if err != nil {
36-
log.Fatal().Err(err).Msg("failed to build root command")
46+
log.Error().Err(err).Msg("failed to build root command")
47+
return 1
3748
}
3849

3950
if err := rootCmd.Execute(); err != nil {
@@ -42,8 +53,10 @@ func main() {
4253
}
4354
var termErr spiceerrors.TerminationError
4455
if errors.As(err, &termErr) {
45-
os.Exit(termErr.ExitCode())
56+
return termErr.ExitCode()
4657
}
47-
os.Exit(1)
58+
return 1
4859
}
60+
61+
return 0
4962
}

internal/logging/logger.go

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,129 @@ package logging
22

33
import (
44
"context"
5+
"fmt"
6+
"io"
7+
"os"
8+
"sync"
9+
"time"
510

611
"github.com/go-logr/zerologr"
712
"github.com/rs/zerolog"
813
logf "sigs.k8s.io/controller-runtime/pkg/log"
914
)
1015

16+
// defaultCloseTimeout is the maximum time to wait for log flushing during shutdown.
17+
// If the underlying writer blocks longer than this, Close() will return with an error
18+
// rather than hanging indefinitely.
19+
const defaultCloseTimeout = 5 * time.Second
20+
1121
var Logger zerolog.Logger
1222

23+
type closerHolder struct {
24+
mu sync.Mutex
25+
closer io.Closer // GUARDED_BY(mu)
26+
// closed is permanent once set to true. This prevents double-close bugs where
27+
// multiple shutdown paths might try to close the same closer. Once closed,
28+
// any new closer passed to Set() is immediately closed to prevent resource leaks.
29+
closed bool // GUARDED_BY(mu)
30+
}
31+
32+
func (ch *closerHolder) Set(closer io.Closer) {
33+
var toClose io.Closer
34+
35+
ch.mu.Lock()
36+
if ch.closed {
37+
toClose = closer
38+
ch.mu.Unlock()
39+
if toClose != nil {
40+
_ = toClose.Close()
41+
}
42+
return
43+
}
44+
45+
// Don't close if the same closer is being set again
46+
if ch.closer != closer {
47+
toClose = ch.closer
48+
}
49+
ch.closer = closer
50+
ch.mu.Unlock()
51+
52+
if toClose != nil {
53+
_ = toClose.Close()
54+
}
55+
}
56+
57+
func (ch *closerHolder) Close() error {
58+
return ch.CloseWithTimeout(defaultCloseTimeout)
59+
}
60+
61+
// CloseWithTimeout closes the held closer with a timeout to prevent shutdown hangs.
62+
// If the closer doesn't complete within the timeout, an error is returned but the
63+
// close operation may still complete in the background.
64+
//
65+
// NOTE: On timeout, the goroutine performing the close continues running until the
66+
// underlying Close() completes. This is intentional for shutdown scenarios where we
67+
// prefer to return promptly rather than block indefinitely. The goroutine will not
68+
// leak permanently—it will terminate when Close() eventually returns (or when the
69+
// process exits).
70+
func (ch *closerHolder) CloseWithTimeout(timeout time.Duration) error {
71+
var toClose io.Closer
72+
73+
ch.mu.Lock()
74+
if ch.closed {
75+
ch.mu.Unlock()
76+
return nil
77+
}
78+
79+
ch.closed = true
80+
toClose = ch.closer
81+
ch.closer = nil
82+
ch.mu.Unlock()
83+
84+
if toClose == nil {
85+
return nil
86+
}
87+
88+
// For zero or negative timeout, close synchronously
89+
if timeout <= 0 {
90+
err := toClose.Close()
91+
if err != nil {
92+
fmt.Fprintf(os.Stderr, "warning: failed to flush logs: %v\n", err)
93+
}
94+
return err
95+
}
96+
97+
// Use a channel to wait for close with timeout
98+
done := make(chan error, 1)
99+
go func() {
100+
done <- toClose.Close()
101+
}()
102+
103+
select {
104+
case err := <-done:
105+
if err != nil {
106+
// Write directly to stderr since the logger may not be functional
107+
fmt.Fprintf(os.Stderr, "warning: failed to flush logs: %v\n", err)
108+
}
109+
return err
110+
case <-time.After(timeout):
111+
err := fmt.Errorf("timeout after %v waiting for log flush", timeout)
112+
fmt.Fprintf(os.Stderr, "warning: %v\n", err)
113+
return err
114+
}
115+
}
116+
117+
// reset clears the closer state without closing the current closer.
118+
// This is only for test use to restore state between test cases.
119+
func (ch *closerHolder) reset() {
120+
ch.mu.Lock()
121+
ch.closer = nil
122+
ch.closed = false
123+
ch.mu.Unlock()
124+
}
125+
126+
var globalCloser closerHolder
127+
13128
func init() {
14129
SetGlobalLogger(zerolog.Nop())
15130
logf.SetLogger(zerologr.New(&Logger))
@@ -20,6 +135,34 @@ func SetGlobalLogger(logger zerolog.Logger) {
20135
zerolog.DefaultContextLogger = &Logger
21136
}
22137

138+
func SetGlobalLoggerWithCloser(logger zerolog.Logger, closer io.Closer) {
139+
// Store the closer BEFORE activating the logger to avoid a race window where
140+
// the logger is active but Close() wouldn't flush it (if called between the two operations).
141+
globalCloser.Set(closer)
142+
SetGlobalLogger(logger)
143+
}
144+
145+
// Close flushes and releases resources owned by the globally configured logger.
146+
// It is safe to call multiple times.
147+
func Close() error { return globalCloser.Close() }
148+
149+
// ResetCloserForTesting resets the global closer state.
150+
//
151+
// WARNING: This is for TEST USE ONLY. Production code must NEVER call this function.
152+
// Calling this in production will silently discard any pending log closer, potentially
153+
// losing buffered log messages.
154+
//
155+
// This function exists because Close() is designed to be idempotent and final (sets
156+
// closed=true permanently), but tests need to reset state between test cases to ensure
157+
// isolation. Without this, a test that calls Close() would cause subsequent tests'
158+
// SetGlobalLoggerWithCloser() calls to immediately close their new closers.
159+
//
160+
// Go's package visibility constraints prevent test files in other packages from
161+
// accessing the unexported globalCloser directly, necessitating this exported helper.
162+
func ResetCloserForTesting() {
163+
globalCloser.reset()
164+
}
165+
23166
func With() zerolog.Context { return Logger.With() }
24167

25168
func Err(err error) *zerolog.Event { return Logger.Err(err) }

0 commit comments

Comments
 (0)