Skip to content

Commit 8ceb472

Browse files
committed
Update.
1 parent a4c082d commit 8ceb472

File tree

2 files changed

+34
-14
lines changed

2 files changed

+34
-14
lines changed

service/rpc/server.go

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ var ServerHandlerSingleton *ServerHandler
9292
// goroutine 计数器,用于监控 RequestTask goroutine 数量
9393
var (
9494
activeRequestTaskGoroutines int64
95-
maxRequestTaskGoroutines int64 = 500 // 最大允许的 RequestTask goroutine 数量
95+
maxRequestTaskGoroutines int64 = 100 // 大幅降低最大允许的 RequestTask goroutine 数量
9696
)
9797

9898
type ServerHandler struct {
@@ -209,10 +209,18 @@ func (s *ServerHandler) RequestTask(h *pb.Host, stream pb.ServerService_RequestT
209209
current := activeRequestTaskGoroutines
210210
total := int64(runtime.NumGoroutine())
211211

212-
// 如果总 goroutine 数量过多,拒绝新连接
213-
if total > 1000 {
214-
log.Printf("警告:总 goroutine 数量过多 (%d),拒绝新的 RequestTask 连接", total)
215-
return -1
212+
// 如果总 goroutine 数量过多,先尝试强制清理
213+
if total > 250 {
214+
log.Printf("警告:总 goroutine 数量过多 (%d),尝试强制清理", total)
215+
cleaned := ForceCleanupStaleConnections()
216+
if cleaned > 0 {
217+
log.Printf("强制清理了 %d 个连接,当前 goroutine 数量: %d", cleaned, runtime.NumGoroutine())
218+
}
219+
// 清理后仍然过多,拒绝新连接
220+
if runtime.NumGoroutine() > 300 {
221+
log.Printf("清理后 goroutine 数量仍过多 (%d),拒绝新的 RequestTask 连接", runtime.NumGoroutine())
222+
return -1
223+
}
216224
}
217225

218226
// 如果 RequestTask goroutine 数量超过限制,拒绝新连接
@@ -262,7 +270,7 @@ func (s *ServerHandler) RequestTask(h *pb.Host, stream pb.ServerService_RequestT
262270
singleton.ServerLock.RUnlock()
263271

264272
// 创建一个带超时的上下文,大幅减少超时时间避免goroutine泄漏
265-
ctx, cancel := context.WithTimeout(stream.Context(), 2*time.Minute)
273+
ctx, cancel := context.WithTimeout(stream.Context(), 1*time.Minute) // 从2分钟缩短到1分钟
266274
defer cancel()
267275

268276
// 监听连接状态,当连接断开时自动清理
@@ -275,7 +283,7 @@ func (s *ServerHandler) RequestTask(h *pb.Host, stream pb.ServerService_RequestT
275283
}()
276284

277285
// 使用定时器避免无限等待,缩短检查间隔
278-
ticker := time.NewTicker(15 * time.Second)
286+
ticker := time.NewTicker(10 * time.Second) // 从15秒进一步减少到10秒
279287
defer ticker.Stop()
280288

281289
for {
@@ -333,7 +341,7 @@ func (s *ServerHandler) RequestTask(h *pb.Host, stream pb.ServerService_RequestT
333341
// 定期记录 goroutine 状态,帮助监控泄漏
334342
totalGoroutines := runtime.NumGoroutine()
335343
activeRequestTasks := activeRequestTaskGoroutines
336-
if totalGoroutines > 800 || activeRequestTasks > 400 {
344+
if totalGoroutines > 200 || activeRequestTasks > 80 {
337345
log.Printf("Goroutine 监控 - 总数: %d, RequestTask: %d, 服务器: %d",
338346
totalGoroutines, activeRequestTasks, clientID)
339347
}
@@ -357,7 +365,7 @@ func (s *ServerHandler) RequestTask(h *pb.Host, stream pb.ServerService_RequestT
357365
}()
358366

359367
// 等待连接关闭或超时,使用定时器避免无限等待,缩短检查间隔
360-
ticker := time.NewTicker(30 * time.Second)
368+
ticker := time.NewTicker(20 * time.Second) // 从30秒进一步减少到20秒
361369
defer ticker.Stop()
362370

363371
for {
@@ -399,7 +407,7 @@ func (s *ServerHandler) RequestTask(h *pb.Host, stream pb.ServerService_RequestT
399407

400408
// 检查 goroutine 泄漏情况,如果过多则强制退出
401409
totalGoroutines := runtime.NumGoroutine()
402-
if totalGoroutines > 1200 {
410+
if totalGoroutines > 400 {
403411
log.Printf("严重警告:goroutine 数量过多 (%d),强制断开服务器 %d 的连接以防止崩溃",
404412
totalGoroutines, clientID)
405413
return fmt.Errorf("goroutine 数量过多,强制断开连接")

service/singleton/singleton.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1841,13 +1841,24 @@ func GetGoroutineCount() int64 {
18411841
// cleanupStaleGoroutineConnections 清理僵尸 goroutine 连接
18421842
func cleanupStaleGoroutineConnections() {
18431843
cleaned := 0
1844+
totalGoroutines := runtime.NumGoroutine()
1845+
1846+
// 如果 goroutine 数量过多,更激进地清理
1847+
cleanupThreshold := 3 * time.Minute
1848+
if totalGoroutines > 200 {
1849+
cleanupThreshold = 2 * time.Minute // 更激进的清理
1850+
}
1851+
if totalGoroutines > 300 {
1852+
cleanupThreshold = 1 * time.Minute // 非常激进的清理
1853+
}
1854+
18441855
ServerLock.Lock()
18451856
defer ServerLock.Unlock()
18461857

1847-
for _, server := range ServerList {
1858+
for serverID, server := range ServerList {
18481859
if server != nil && server.TaskClose != nil {
1849-
// 检查连接是否长时间无活动(超过5分钟)
1850-
if time.Since(server.LastActive) > 5*time.Minute {
1860+
// 检查连接是否长时间无活动
1861+
if time.Since(server.LastActive) > cleanupThreshold {
18511862
server.TaskCloseLock.Lock()
18521863
if server.TaskClose != nil {
18531864
// 强制关闭僵尸连接
@@ -1858,14 +1869,15 @@ func cleanupStaleGoroutineConnections() {
18581869
server.TaskClose = nil
18591870
server.TaskStream = nil
18601871
cleaned++
1872+
log.Printf("清理服务器 %d 的僵尸连接(无活动时间: %v)", serverID, time.Since(server.LastActive))
18611873
}
18621874
server.TaskCloseLock.Unlock()
18631875
}
18641876
}
18651877
}
18661878

18671879
if cleaned > 0 {
1868-
log.Printf("内存清理:清理了 %d 个僵尸连接", cleaned)
1880+
log.Printf("内存清理:清理了 %d 个僵尸连接,当前 goroutine 数量: %d", cleaned, runtime.NumGoroutine())
18691881
}
18701882
}
18711883

0 commit comments

Comments
 (0)