Skip to content
3 changes: 3 additions & 0 deletions base/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ const (

// FromConnStrWarningThreshold determines the amount of time it should take before we warn about parsing a connstr (mostly for DNS resolution)
FromConnStrWarningThreshold = 10 * time.Second

// StackFilePrefix is the prefix used when writing stack trace files
StackFilePrefix = "sg_goroutines_"
)

// SyncGatewayRawDocXattrs is a list of xattrs that Sync Gateway will fetch when reading a raw document.
Expand Down
70 changes: 70 additions & 0 deletions base/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,13 @@ import (
"net"
"net/http"
"net/url"
"os"
"path/filepath"
"reflect"
"regexp"
"runtime"
"runtime/debug"
"runtime/pprof"
"slices"
"sort"
"strconv"
Expand Down Expand Up @@ -1831,3 +1834,70 @@ func IsRevTreeID(s string) bool {
}
return false
}

// GetStackTrace will return goroutine stack traces for all goroutines in Sync Gateway.
func GetStackTrace() (bytes.Buffer, error) {
profBuf := bytes.Buffer{}
err := pprof.Lookup("goroutine").WriteTo(&profBuf, 2)
return profBuf, err
}

// RotateFilenamesIfNeeded will remove old files if there are more than
// 10 matching the given filename pattern.
func RotateFilenamesIfNeeded(filename string) error {
existingFiles, err := filepath.Glob(filename)
if err != nil {
return fmt.Errorf("Error listing existing profiles in %q: %w", filename, err)
}
if len(existingFiles) <= 10 {
return nil
}
slices.Reverse(existingFiles)
var multiErr *MultiError
for _, profile := range existingFiles[10:] {
err = os.Remove(profile)
if err != nil {
multiErr = multiErr.Append(fmt.Errorf("Error removing old profile %q: %w", profile, err))
}
}
return multiErr.ErrorOrNil()
}

func LogStackTraces(ctx context.Context, logDirectory string, stackTrace bytes.Buffer, timestamp string) {

// log to console
_, _ = fmt.Fprintf(os.Stderr, "Stack trace:\n%s\n", stackTrace.String())

err := writeStackTraceFile(ctx, logDirectory, timestamp, stackTrace)
if err != nil {
return
}

rotatePath := filepath.Join(logDirectory, StackFilePrefix+"*.log")
err = RotateFilenamesIfNeeded(rotatePath)
if err != nil {
WarnfCtx(ctx, "Error rotating stack trace files in path %s: %v", rotatePath, err)
}
}

func writeStackTraceFile(ctx context.Context, logDirectory, timestamp string, stackTrace bytes.Buffer) error {
filename := filepath.Join(logDirectory, StackFilePrefix+timestamp+".log")
file, err := os.Create(filename)
defer func() {
closeErr := file.Close()
if closeErr != nil {
WarnfCtx(ctx, "Error closing stack trace file %s: %v", filename, closeErr)
}
}()
if err != nil {
WarnfCtx(ctx, "Error opening stack trace file %s: %v", filename, err)
return err
}

_, err = file.WriteString(fmt.Sprintf("Stack trace:\n%s\n", stackTrace.String()))
if err != nil {
WarnfCtx(ctx, "Error writing stack trace to file %s: %v", filename, err)
return err
}
return nil
}
11 changes: 11 additions & 0 deletions rest/adminapitest/admin_api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2269,6 +2269,17 @@ func TestHandleSGCollect(t *testing.T) {
rest.RequireStatus(t, resp, http.StatusBadRequest)
}

func TestHandleGetStackTrace(t *testing.T) {
rt := rest.NewRestTester(t, nil)
defer rt.Close()

resp := rt.SendAdminRequest(http.MethodGet, "/_debug/pprof/goroutine?debug=2", "")
rest.RequireStatus(t, resp, http.StatusOK)
rawResponseStr := resp.Body.String()
assert.Contains(t, rawResponseStr, "goroutine")
assert.Contains(t, rawResponseStr, "handlePprofGoroutine")
}

func TestConfigRedaction(t *testing.T) {
base.LongRunningTest(t)

Expand Down
25 changes: 0 additions & 25 deletions rest/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,10 @@ import (
_ "net/http/pprof"
"net/url"
"os"
"os/signal"
"strconv"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"

"github.com/go-jose/go-jose/v4"
Expand Down Expand Up @@ -2315,29 +2313,6 @@ func HandleSighup(ctx context.Context) {
base.RotateLogfiles(ctx)
}

// RegisterSignalHandler invokes functions based on the given signals:
// - SIGHUP causes Sync Gateway to rotate log files.
// - SIGINT or SIGTERM causes Sync Gateway to exit cleanly.
// - SIGKILL cannot be handled by the application.
func RegisterSignalHandler(ctx context.Context) {
signalChannel := make(chan os.Signal, 1)
signal.Notify(signalChannel, syscall.SIGHUP, os.Interrupt, syscall.SIGTERM)

go func() {
for sig := range signalChannel {
base.InfofCtx(ctx, base.KeyAll, "Handling signal: %v", sig)
switch sig {
case syscall.SIGHUP:
HandleSighup(ctx)
default:
// Ensure log buffers are flushed before exiting.
base.FlushLogBuffers()
os.Exit(130) // 130 == exit code 128 + 2 (interrupt)
}
}
}()
}

// toDbLogConfig converts the stored logging in a DbConfig to a runtime DbLogConfig for evaluation at log time.
// This is required to turn the stored config (which does not have data stored in a O(1)-compatible format) into a data structure that has O(1) lookups for checking if we should log.
func (c *DbConfig) toDbLogConfig(ctx context.Context) *base.DbLogConfig {
Expand Down
16 changes: 13 additions & 3 deletions rest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ import (
"fmt"
"io"
"os"
"os/signal"
"path/filepath"
"runtime"
"strings"
"time"

Expand All @@ -37,7 +39,7 @@ func ServerMain() {

// TODO: Pass ctx down into HTTP servers so that serverMain can be stopped.
func serverMain(ctx context.Context, osArgs []string) error {
RegisterSignalHandler(ctx)
sigChan := RegisterSignalHandler(ctx, "")
defer base.FatalPanicHandler()

base.InitializeMemoryLoggers()
Expand All @@ -56,7 +58,7 @@ func serverMain(ctx context.Context, osArgs []string) error {
return legacyServerMain(ctx, osArgs, flagStartupConfig)
}

disablePersistentConfigFallback, err := serverMainPersistentConfig(ctx, fs, flagStartupConfig)
disablePersistentConfigFallback, err := serverMainPersistentConfig(ctx, fs, flagStartupConfig, sigChan)
if disablePersistentConfigFallback {
return legacyServerMain(ctx, osArgs, flagStartupConfig)
}
Expand All @@ -65,7 +67,7 @@ func serverMain(ctx context.Context, osArgs []string) error {
}

// serverMainPersistentConfig runs the Sync Gateway server with persistent config.
func serverMainPersistentConfig(ctx context.Context, fs *flag.FlagSet, flagStartupConfig *StartupConfig) (disablePersistentConfigFallback bool, err error) {
func serverMainPersistentConfig(ctx context.Context, fs *flag.FlagSet, flagStartupConfig *StartupConfig, sigChan chan os.Signal) (disablePersistentConfigFallback bool, err error) {

sc := DefaultStartupConfig(defaultLogFilePath)
base.TracefCtx(ctx, base.KeyAll, "default config: %#v", sc)
Expand Down Expand Up @@ -148,6 +150,14 @@ func serverMainPersistentConfig(ctx context.Context, fs *flag.FlagSet, flagStart
return false, err
}

if runtime.GOOS != "windows" {
// stop signal handlers and register for stack trace handling to be able to log to configured directory,
// this is not supported for windows environments
signal.Stop(sigChan)
close(sigChan)
RegisterSignalHandler(ctx, svrctx.Config.Logging.LogFilePath)
}

svrctx.initialStartupConfig = initialStartupConfig

svrctx.addLegacyPrincipals(ctx, legacyDbUsers, legacyDbRoles)
Expand Down
60 changes: 60 additions & 0 deletions rest/register_handler_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//go:build !windows
// +build !windows
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this line is actually unnecessary and will be flagged by govet in go 1.25, but not sure, we can leave this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I use go 1.25 locally and my linter didn't pick that up so should be okay fro now (I think)


/*
Copyright 2025-Present Couchbase, Inc.
Use of this software is governed by the Business Source License included in
the file licenses/BSL-Couchbase.txt. As of the Change Date specified in that
file, in accordance with the Business Source License, use of this software will
be governed by the Apache License, Version 2.0, included in the file
licenses/APL2.txt.
*/

package rest

import (
"context"
"os"
"os/signal"
"syscall"
"time"

"github.com/couchbase/sync_gateway/base"
)

// RegisterSignalHandler invokes functions based on the given signals for unix environments:
// - SIGHUP causes Sync Gateway to rotate log files.
// - SIGINT or SIGTERM causes Sync Gateway to exit cleanly.
// - SIGKILL cannot be handled by the application.
// - SIGUSR1 causes Sync Gateway to log stack traces for all goroutines.
func RegisterSignalHandler(ctx context.Context, logDirectory string) chan os.Signal {
signalChannel := make(chan os.Signal, 1)
signal.Notify(signalChannel, syscall.SIGHUP, os.Interrupt, syscall.SIGTERM, syscall.SIGUSR1)

go func() {
for sig := range signalChannel {
base.InfofCtx(ctx, base.KeyAll, "Handling signal: %v", sig)
switch sig {
case syscall.SIGHUP:
HandleSighup(ctx)
case syscall.SIGUSR1:
stackTrace, err := base.GetStackTrace()
if err != nil {
base.WarnfCtx(ctx, "Error collecting stack trace: %v", err)
} else {
base.InfofCtx(ctx, base.KeyAll, "Collecting stack trace for all goroutines")
}
// log to console and log to file in the log directory
currentTime := time.Now()
timestamp := currentTime.Format(time.RFC3339)
base.LogStackTraces(ctx, logDirectory, stackTrace, timestamp)
default:
// Ensure log buffers are flushed before exiting.
base.FlushLogBuffers()
os.Exit(130) // 130 == exit code 128 + 2 (interrupt)
}
}
}()
return signalChannel
}
47 changes: 47 additions & 0 deletions rest/register_handler_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//go:build windows
// +build windows

/*
Copyright 2025-Present Couchbase, Inc.

Use of this software is governed by the Business Source License included in
the file licenses/BSL-Couchbase.txt. As of the Change Date specified in that
file, in accordance with the Business Source License, use of this software will
be governed by the Apache License, Version 2.0, included in the file
licenses/APL2.txt.
*/

package rest

import (
"context"
"os"
"os/signal"
"syscall"

"github.com/couchbase/sync_gateway/base"
)

// RegisterSignalHandler invokes functions based on the given signals for windows environments:
// - SIGHUP causes Sync Gateway to rotate log files.
// - SIGINT or SIGTERM causes Sync Gateway to exit cleanly.
// - SIGKILL cannot be handled by the application.
func RegisterSignalHandler(ctx context.Context, logDirectory string) chan os.Signal {
signalChannel := make(chan os.Signal, 1)
signal.Notify(signalChannel, syscall.SIGHUP, os.Interrupt, syscall.SIGTERM)

go func() {
for sig := range signalChannel {
base.InfofCtx(ctx, base.KeyAll, "Handling signal: %v", sig)
switch sig {
case syscall.SIGHUP:
HandleSighup(ctx)
default:
// Ensure log buffers are flushed before exiting.
base.FlushLogBuffers()
os.Exit(130) // 130 == exit code 128 + 2 (interrupt)
}
}
}()
return signalChannel
}
32 changes: 32 additions & 0 deletions rest/server_context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"net/http"
"os"
"path/filepath"
"slices"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -1062,3 +1063,34 @@ func TestDatabaseCollectionDeletedErrorState(t *testing.T) {
allDbs = rt.ServerContext().allDatabaseSummaries()
require.Len(t, allDbs, 1)
}

func TestCollectStackTraceFile(t *testing.T) {
base.SetUpTestLogging(t, base.LevelInfo, base.KeyAll)

tempPath := t.TempDir()
serverConfig := DefaultStartupConfig(tempPath)
serverConfig.Logging.LogFilePath = tempPath
ctx := t.Context()
serverContext := NewServerContext(ctx, &serverConfig, false)
defer serverContext.Close(ctx)

timeStamp := "01"
stackTrace, err := base.GetStackTrace()
require.NoError(t, err)
base.LogStackTraces(ctx, serverConfig.Logging.LogFilePath, stackTrace, timeStamp)
require.Len(t, getFilenames(t, tempPath), 1)
assert.True(t, slices.Contains(getFilenames(t, tempPath), base.StackFilePrefix+timeStamp+".log"))

// trigger rotation and assert we don't go above 10 files
expectedFiles := make([]string, 0, 10)
for i := 2; i < 12; i++ {
timeStamp = fmt.Sprintf("%d", i+2)
trace, err := base.GetStackTrace()
require.NoError(t, err)
base.LogStackTraces(ctx, serverConfig.Logging.LogFilePath, trace, timeStamp)
expectedFiles = append(expectedFiles, base.StackFilePrefix+timeStamp+".log")
}
files := getFilenames(t, tempPath)
require.Len(t, files, 10)
require.ElementsMatch(t, files, expectedFiles)
}
21 changes: 4 additions & 17 deletions rest/stats_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"path/filepath"
"runtime"
"runtime/pprof"
"slices"
"sync/atomic"
"time"

Expand Down Expand Up @@ -407,20 +406,8 @@ func (statsContext *statsContext) collectMemoryProfile(ctx context.Context, outp
if err != nil {
return fmt.Errorf("Error writing memory profile to %q: %w", filename, err)
}
existingProfiles, err := filepath.Glob(filepath.Join(outputDir, pprofPrefix+"*.pb.gz"))
if err != nil {
return fmt.Errorf("Error listing existing memory profiles in %q: %w", outputDir, err)
}
if len(existingProfiles) <= 10 {
return nil
}
slices.Reverse(existingProfiles)
var multiErr *base.MultiError
for _, profile := range existingProfiles[10:] {
err = os.Remove(profile)
if err != nil {
multiErr = multiErr.Append(fmt.Errorf("Error removing old memory profile %q: %w", profile, err))
}
}
return multiErr.ErrorOrNil()

// rotate old profiles
path := filepath.Join(outputDir, pprofPrefix+"*.pb.gz")
return base.RotateFilenamesIfNeeded(path)
}
Loading
Loading