From c1bc6b87661b29788d84cd5c73ac775c99a7795f Mon Sep 17 00:00:00 2001
From: ita004 <shfahmd001@gmail.com>
Date: Fri, 3 Oct 2025 23:31:56 +0530
Subject: [PATCH 1/2] fix(graceful): guard WaitGroup.Wait with sync.Once to
 avoid panic on repeated shutdowns Fixes #35559

Signed-off-by: ita004 <shfahmd001@gmail.com>
---
 modules/graceful/server.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/modules/graceful/server.go b/modules/graceful/server.go
index 2525a83e77520..c06496ffc1d7a 100644
--- a/modules/graceful/server.go
+++ b/modules/graceful/server.go
@@ -34,6 +34,7 @@ type Server struct {
 	address              string
 	listener             net.Listener
 	wg                   sync.WaitGroup
+	shutdownOnce         sync.Once // Ensures WaitGroup.Wait() is called only once to prevent panic on reuse
 	state                state
 	lock                 *sync.RWMutex
 	BeforeBegin          func(network, address string)
@@ -154,7 +155,10 @@ func (srv *Server) Serve(serve ServeFunction) error {
 	GetManager().RegisterServer()
 	err := serve(srv.listener)
 	log.Debug("Waiting for connections to finish... (PID: %d)", syscall.Getpid())
-	srv.wg.Wait()
+	// Use sync.Once to ensure WaitGroup.Wait() is only called once, preventing "WaitGroup is reused" panic
+	srv.shutdownOnce.Do(func() {
+		srv.wg.Wait()
+	})
 	srv.setState(stateTerminate)
 	GetManager().ServerDone()
 	// use of closed means that the listeners are closed - i.e. we should be shutting down - return nil

From de392560b682fb90dacff0d94f2b3ee34dcf5787 Mon Sep 17 00:00:00 2001
From: ita004 <shfahmd001@gmail.com>
Date: Sun, 5 Oct 2025 20:45:40 +0530
Subject: [PATCH 2/2] fix(graceful): use SafeWaitGroup to prevent WaitGroup
 reuse panic and reject new work during shutdown

---
 modules/graceful/manager.go        |  19 ++-
 modules/graceful/manager_common.go |   9 +-
 modules/graceful/server.go         |  16 +--
 modules/graceful/server_hooks.go   |  25 ++--
 modules/graceful/wg_safe.go        |  49 ++++++++
 modules/graceful/wg_safe_test.go   | 178 +++++++++++++++++++++++++++++
 6 files changed, 262 insertions(+), 34 deletions(-)
 create mode 100644 modules/graceful/wg_safe.go
 create mode 100644 modules/graceful/wg_safe_test.go

diff --git a/modules/graceful/manager.go b/modules/graceful/manager.go
index ee1872b9996aa..806c18b0c3845 100644
--- a/modules/graceful/manager.go
+++ b/modules/graceful/manager.go
@@ -72,7 +72,10 @@ func initManager(ctx context.Context) {
 // The Cancel function should stop the Run function in predictable time.
 func (g *Manager) RunWithCancel(rc RunCanceler) {
 	g.RunAtShutdown(context.Background(), rc.Cancel)
-	g.runningServerWaitGroup.Add(1)
+	if !g.runningServerWaitGroup.AddIfRunning() {
+		// Shutdown already in progress, do not start this server
+		return
+	}
 	defer g.runningServerWaitGroup.Done()
 	defer func() {
 		if err := recover(); err != nil {
@@ -87,7 +90,10 @@ func (g *Manager) RunWithCancel(rc RunCanceler) {
 // After the provided context is Done(), the main function must return once shutdown is complete.
 // (Optionally the HammerContext may be obtained and waited for however, this should be avoided if possible.)
 func (g *Manager) RunWithShutdownContext(run func(context.Context)) {
-	g.runningServerWaitGroup.Add(1)
+	if !g.runningServerWaitGroup.AddIfRunning() {
+		// Shutdown already in progress, do not start this server
+		return
+	}
 	defer g.runningServerWaitGroup.Done()
 	defer func() {
 		if err := recover(); err != nil {
@@ -102,7 +108,10 @@ func (g *Manager) RunWithShutdownContext(run func(context.Context)) {
 
 // RunAtTerminate adds to the terminate wait group and creates a go-routine to run the provided function at termination
 func (g *Manager) RunAtTerminate(terminate func()) {
-	g.terminateWaitGroup.Add(1)
+	if !g.terminateWaitGroup.AddIfRunning() {
+		// Termination already in progress, do not add this function
+		return
+	}
 	g.lock.Lock()
 	defer g.lock.Unlock()
 	g.toRunAtTerminate = append(g.toRunAtTerminate,
@@ -155,12 +164,12 @@ func (g *Manager) doShutdown() {
 		go g.doHammerTime(setting.GracefulHammerTime)
 	}
 	go func() {
-		g.runningServerWaitGroup.Wait()
+		g.runningServerWaitGroup.Shutdown()
 		// Mop up any remaining unclosed events.
 		g.doHammerTime(0)
 		<-time.After(1 * time.Second)
 		g.doTerminate()
-		g.terminateWaitGroup.Wait()
+		g.terminateWaitGroup.Shutdown()
 		g.lock.Lock()
 		g.managerCtxCancel()
 		g.lock.Unlock()
diff --git a/modules/graceful/manager_common.go b/modules/graceful/manager_common.go
index 7cfbdfbeb0ead..5e79825f52cb9 100644
--- a/modules/graceful/manager_common.go
+++ b/modules/graceful/manager_common.go
@@ -43,8 +43,8 @@ type Manager struct {
 	hammerCtxCancel        context.CancelFunc
 	terminateCtxCancel     context.CancelFunc
 	managerCtxCancel       context.CancelFunc
-	runningServerWaitGroup sync.WaitGroup
-	terminateWaitGroup     sync.WaitGroup
+	runningServerWaitGroup SafeWaitGroup
+	terminateWaitGroup     SafeWaitGroup
 	createServerCond       sync.Cond
 	createdServer          int
 	shutdownRequested      chan struct{}
@@ -106,5 +106,8 @@ func (g *Manager) DoGracefulShutdown() {
 // Any call to RegisterServer must be matched by a call to ServerDone
 func (g *Manager) RegisterServer() {
 	KillParent()
-	g.runningServerWaitGroup.Add(1)
+	if !g.runningServerWaitGroup.AddIfRunning() {
+		// Shutdown already in progress, server should not start
+		return
+	}
 }
diff --git a/modules/graceful/server.go b/modules/graceful/server.go
index c06496ffc1d7a..5ad71e120a398 100644
--- a/modules/graceful/server.go
+++ b/modules/graceful/server.go
@@ -33,8 +33,7 @@ type Server struct {
 	network              string
 	address              string
 	listener             net.Listener
-	wg                   sync.WaitGroup
-	shutdownOnce         sync.Once // Ensures WaitGroup.Wait() is called only once to prevent panic on reuse
+	wg                   SafeWaitGroup
 	state                state
 	lock                 *sync.RWMutex
 	BeforeBegin          func(network, address string)
@@ -51,7 +50,6 @@ func NewServer(network, address, name string) *Server {
 		log.Info("Starting new %s server: %s:%s on PID: %d", name, network, address, os.Getpid())
 	}
 	srv := &Server{
-		wg:                   sync.WaitGroup{},
 		state:                stateInit,
 		lock:                 &sync.RWMutex{},
 		network:              network,
@@ -155,10 +153,8 @@ func (srv *Server) Serve(serve ServeFunction) error {
 	GetManager().RegisterServer()
 	err := serve(srv.listener)
 	log.Debug("Waiting for connections to finish... (PID: %d)", syscall.Getpid())
-	// Use sync.Once to ensure WaitGroup.Wait() is only called once, preventing "WaitGroup is reused" panic
-	srv.shutdownOnce.Do(func() {
-		srv.wg.Wait()
-	})
+	// Shutdown the waitgroup and wait for all connections to finish
+	srv.wg.Shutdown()
 	srv.setState(stateTerminate)
 	GetManager().ServerDone()
 	// use of closed means that the listeners are closed - i.e. we should be shutting down - return nil
@@ -228,7 +224,11 @@ func (wl *wrappedListener) Accept() (net.Conn, error) {
 		perWritePerKbTimeout: wl.server.PerWritePerKbTimeout,
 	}
 
-	wl.server.wg.Add(1)
+	if !wl.server.wg.AddIfRunning() {
+		// Server is shutting down, reject new connection
+		_ = c.Close()
+		return nil, net.ErrClosed
+	}
 	return c, nil
 }
 
diff --git a/modules/graceful/server_hooks.go b/modules/graceful/server_hooks.go
index 9b67589571c35..619d0baf659af 100644
--- a/modules/graceful/server_hooks.go
+++ b/modules/graceful/server_hooks.go
@@ -48,26 +48,15 @@ func (srv *Server) doShutdown() {
 }
 
 func (srv *Server) doHammer() {
-	defer func() {
-		// We call srv.wg.Done() until it panics.
-		// This happens if we call Done() when the WaitGroup counter is already at 0
-		// So if it panics -> we're done, Serve() will return and the
-		// parent will goroutine will exit.
-		if r := recover(); r != nil {
-			log.Error("WaitGroup at 0: Error: %v", r)
-		}
-	}()
 	if srv.getState() != stateShuttingDown {
 		return
 	}
 	log.Warn("Forcefully shutting down parent")
-	for {
-		if srv.getState() == stateTerminate {
-			break
-		}
-		srv.wg.Done()
-
-		// Give other goroutines a chance to finish before we forcibly stop them.
-		runtime.Gosched()
-	}
+	
+	// Shutdown the waitgroup to prevent new connections
+	// and wait for existing connections to finish
+	srv.wg.Shutdown()
+	
+	// Give other goroutines a chance to finish
+	runtime.Gosched()
 }
diff --git a/modules/graceful/wg_safe.go b/modules/graceful/wg_safe.go
new file mode 100644
index 0000000000000..49c147a32fa6c
--- /dev/null
+++ b/modules/graceful/wg_safe.go
@@ -0,0 +1,49 @@
+// Copyright 2025 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package graceful
+
+import (
+	"sync"
+)
+
+// SafeWaitGroup is a small wrapper around sync.WaitGroup that prevents
+// new Adds after Shutdown has been requested. It prevents the "WaitGroup
+// is reused before previous Wait has returned" panic by gating Add calls.
+type SafeWaitGroup struct {
+	mu       sync.Mutex
+	wg       sync.WaitGroup
+	shutting bool
+}
+
+// AddIfRunning attempts to add one to the waitgroup. It returns true if
+// the add succeeded; false if shutdown has already started and add was rejected.
+func (s *SafeWaitGroup) AddIfRunning() bool {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.shutting {
+		return false
+	}
+	s.wg.Add(1)
+	return true
+}
+
+// Done decrements the wait group counter.
+// Call only if AddIfRunning returned true previously.
+func (s *SafeWaitGroup) Done() {
+	s.wg.Done()
+}
+
+// Wait waits for the waitgroup to complete.
+func (s *SafeWaitGroup) Wait() {
+	s.wg.Wait()
+}
+
+// Shutdown marks the group as shutting and then waits for all existing
+// routines to finish. After Shutdown returns, AddIfRunning will return false.
+func (s *SafeWaitGroup) Shutdown() {
+	s.mu.Lock()
+	s.shutting = true
+	s.mu.Unlock()
+	s.wg.Wait()
+}
diff --git a/modules/graceful/wg_safe_test.go b/modules/graceful/wg_safe_test.go
new file mode 100644
index 0000000000000..dcffb9c13daa9
--- /dev/null
+++ b/modules/graceful/wg_safe_test.go
@@ -0,0 +1,178 @@
+// Copyright 2025 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package graceful
+
+import (
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestSafeWaitGroup_AddIfRunning(t *testing.T) {
+	var swg SafeWaitGroup
+
+	// Test that AddIfRunning succeeds before shutdown
+	assert.True(t, swg.AddIfRunning(), "AddIfRunning should succeed before shutdown")
+	swg.Done()
+
+	// Test that AddIfRunning fails after shutdown
+	swg.Shutdown()
+	assert.False(t, swg.AddIfRunning(), "AddIfRunning should fail after shutdown")
+}
+
+func TestSafeWaitGroup_Shutdown(t *testing.T) {
+	var swg SafeWaitGroup
+
+	// Add some work
+	assert.True(t, swg.AddIfRunning())
+	assert.True(t, swg.AddIfRunning())
+
+	// Complete work in goroutines
+	go func() {
+		time.Sleep(50 * time.Millisecond)
+		swg.Done()
+	}()
+	go func() {
+		time.Sleep(100 * time.Millisecond)
+		swg.Done()
+	}()
+
+	// Shutdown should wait for all work to complete
+	start := time.Now()
+	swg.Shutdown()
+	elapsed := time.Since(start)
+
+	assert.GreaterOrEqual(t, elapsed, 100*time.Millisecond, "Shutdown should wait for all work")
+}
+
+func TestSafeWaitGroup_ConcurrentAddAndShutdown(t *testing.T) {
+	var swg SafeWaitGroup
+	var addCount atomic.Int32
+	var successCount atomic.Int32
+
+	// Start many goroutines trying to add
+	const numGoroutines = 100
+	var wg sync.WaitGroup
+	wg.Add(numGoroutines)
+
+	for range numGoroutines {
+		go func() {
+			defer wg.Done()
+			if swg.AddIfRunning() {
+				addCount.Add(1)
+				time.Sleep(10 * time.Millisecond)
+				swg.Done()
+				successCount.Add(1)
+			}
+		}()
+	}
+
+	// Give some goroutines time to add
+	time.Sleep(5 * time.Millisecond)
+
+	// Now shutdown
+	swg.Shutdown()
+
+	// Wait for all goroutines to finish
+	wg.Wait()
+
+	// All adds that succeeded should have completed
+	assert.Equal(t, addCount.Load(), successCount.Load(), "All successful adds should complete")
+
+	// After shutdown, no new adds should succeed
+	assert.False(t, swg.AddIfRunning(), "No adds should succeed after shutdown")
+}
+
+func TestSafeWaitGroup_MultipleShutdowns(t *testing.T) {
+	var swg SafeWaitGroup
+
+	// First shutdown
+	swg.Shutdown()
+
+	// Second shutdown should not panic
+	assert.NotPanics(t, func() {
+		swg.Shutdown()
+	}, "Multiple shutdowns should not panic")
+}
+
+func TestSafeWaitGroup_WaitWithoutAdd(t *testing.T) {
+	var swg SafeWaitGroup
+
+	// Wait without any adds should not block
+	done := make(chan struct{})
+	go func() {
+		swg.Wait()
+		close(done)
+	}()
+
+	select {
+	case <-done:
+		// Success
+	case <-time.After(100 * time.Millisecond):
+		t.Fatal("Wait should not block when no work is added")
+	}
+}
+
+func TestSafeWaitGroup_PreventsPanic(t *testing.T) {
+	var swg SafeWaitGroup
+
+	// This pattern would cause a panic with sync.WaitGroup:
+	// Add -> Wait (in goroutine) -> Add (would panic)
+
+	assert.True(t, swg.AddIfRunning())
+
+	go func() {
+		time.Sleep(10 * time.Millisecond)
+		swg.Done()
+	}()
+
+	// Start shutdown which will wait
+	go swg.Shutdown()
+
+	// Give shutdown time to start
+	time.Sleep(5 * time.Millisecond)
+
+	// This should not panic, just return false
+	assert.NotPanics(t, func() {
+		result := swg.AddIfRunning()
+		assert.False(t, result, "Add should be rejected during shutdown")
+	}, "AddIfRunning should not panic during shutdown")
+}
+
+func TestSafeWaitGroup_RaceCondition(t *testing.T) {
+	// This test is designed to catch race conditions
+	// Run with: go test -race
+	var swg SafeWaitGroup
+	var wg sync.WaitGroup
+
+	const numWorkers = 50
+	wg.Add(numWorkers * 2) // workers + shutdown goroutines
+
+	// Start workers
+	for range numWorkers {
+		go func() {
+			defer wg.Done()
+			for range 10 {
+				if swg.AddIfRunning() {
+					time.Sleep(time.Millisecond)
+					swg.Done()
+				}
+			}
+		}()
+	}
+
+	// Start shutdown attempts
+	for range numWorkers {
+		go func() {
+			defer wg.Done()
+			time.Sleep(5 * time.Millisecond)
+			swg.Shutdown()
+		}()
+	}
+
+	wg.Wait()
+}