Skip to content

Commit 4d29e90

Browse files
authored
PR 说明:修复 /requests 页面卡顿/卡死(WS 降载 + 前端重绘抑制 + 索引优化) (#246)
* 修复 前端/requests 卡成狗屎, api/admin/requests?limit=100 后端慢成狗 * 全访问优化性能 * 优化 WebSocket 广播逻辑,确保消息快照,添加 MySQL 重复索引错误处理,增加单元测试 * 优化 WebSocket 消息广播,确保数据快照;添加单元测试以验证行为
1 parent 3e71063 commit 4d29e90

File tree

15 files changed

+662
-165
lines changed

15 files changed

+662
-165
lines changed

internal/event/broadcaster.go

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,42 @@ type Broadcaster interface {
1414
// NopBroadcaster 空实现,用于测试或不需要广播的场景
1515
type NopBroadcaster struct{}
1616

17-
func (n *NopBroadcaster) BroadcastProxyRequest(req *domain.ProxyRequest) {}
17+
func (n *NopBroadcaster) BroadcastProxyRequest(req *domain.ProxyRequest) {}
1818
func (n *NopBroadcaster) BroadcastProxyUpstreamAttempt(attempt *domain.ProxyUpstreamAttempt) {}
19-
func (n *NopBroadcaster) BroadcastLog(message string) {}
20-
func (n *NopBroadcaster) BroadcastMessage(messageType string, data interface{}) {}
19+
func (n *NopBroadcaster) BroadcastLog(message string) {}
20+
func (n *NopBroadcaster) BroadcastMessage(messageType string, data interface{}) {}
21+
22+
// SanitizeProxyRequestForBroadcast 用于“实时广播”场景瘦身 payload:
23+
// 去掉 request/response 大字段,避免 WebSocket 消息动辄几十/几百 KB,导致前端 JSON.parse / GC 卡死。
24+
//
25+
// 说明:
26+
// - /requests 列表页只需要轻量字段(状态、耗时、tokens、成本等)。
27+
// - 详情页需要的大字段应通过 /admin/requests/{id} 与 /admin/requests/{id}/attempts 拉取。
28+
func SanitizeProxyRequestForBroadcast(req *domain.ProxyRequest) *domain.ProxyRequest {
29+
if req == nil {
30+
return nil
31+
}
32+
// 已经是瘦身后的对象,避免重复拷贝(高频场景会产生额外 GC 压力)
33+
if req.RequestInfo == nil && req.ResponseInfo == nil {
34+
return req
35+
}
36+
copied := *req
37+
copied.RequestInfo = nil
38+
copied.ResponseInfo = nil
39+
return &copied
40+
}
41+
42+
// SanitizeProxyUpstreamAttemptForBroadcast 用于“实时广播”场景瘦身 payload。
43+
func SanitizeProxyUpstreamAttemptForBroadcast(attempt *domain.ProxyUpstreamAttempt) *domain.ProxyUpstreamAttempt {
44+
if attempt == nil {
45+
return nil
46+
}
47+
// 已经是瘦身后的对象,避免重复拷贝(高频场景会产生额外 GC 压力)
48+
if attempt.RequestInfo == nil && attempt.ResponseInfo == nil {
49+
return attempt
50+
}
51+
copied := *attempt
52+
copied.RequestInfo = nil
53+
copied.ResponseInfo = nil
54+
return &copied
55+
}

internal/event/wails_broadcaster_desktop.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ func (w *WailsBroadcaster) emitWailsEvent(eventType string, data interface{}) {
4747

4848
// BroadcastProxyRequest broadcasts a proxy request update
4949
func (w *WailsBroadcaster) BroadcastProxyRequest(req *domain.ProxyRequest) {
50+
req = SanitizeProxyRequestForBroadcast(req)
5051
// Broadcast via inner broadcaster (WebSocket)
5152
if w.inner != nil {
5253
w.inner.BroadcastProxyRequest(req)
@@ -57,6 +58,7 @@ func (w *WailsBroadcaster) BroadcastProxyRequest(req *domain.ProxyRequest) {
5758

5859
// BroadcastProxyUpstreamAttempt broadcasts a proxy upstream attempt update
5960
func (w *WailsBroadcaster) BroadcastProxyUpstreamAttempt(attempt *domain.ProxyUpstreamAttempt) {
61+
attempt = SanitizeProxyUpstreamAttemptForBroadcast(attempt)
6062
if w.inner != nil {
6163
w.inner.BroadcastProxyUpstreamAttempt(attempt)
6264
}

internal/executor/executor.go

Lines changed: 61 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package executor
1+
package executor
22

33
import (
44
"context"
@@ -321,51 +321,71 @@ func (e *Executor) processAdapterEventsRealtime(eventChan domain.AdapterEventCha
321321
return
322322
}
323323

324-
for event := range eventChan {
325-
if event == nil {
326-
continue
327-
}
324+
// 事件节流:合并多个 adapter 事件为一次广播,避免在流式高并发下产生“广播风暴”
325+
const broadcastThrottle = 200 * time.Millisecond
326+
ticker := time.NewTicker(broadcastThrottle)
327+
defer ticker.Stop()
328328

329-
needsBroadcast := false
329+
dirty := false
330330

331-
switch event.Type {
332-
case domain.EventRequestInfo:
333-
if !e.shouldClearRequestDetail() && event.RequestInfo != nil {
334-
attempt.RequestInfo = event.RequestInfo
335-
needsBroadcast = true
336-
}
337-
case domain.EventResponseInfo:
338-
if !e.shouldClearRequestDetail() && event.ResponseInfo != nil {
339-
attempt.ResponseInfo = event.ResponseInfo
340-
needsBroadcast = true
341-
}
342-
case domain.EventMetrics:
343-
if event.Metrics != nil {
344-
attempt.InputTokenCount = event.Metrics.InputTokens
345-
attempt.OutputTokenCount = event.Metrics.OutputTokens
346-
attempt.CacheReadCount = event.Metrics.CacheReadCount
347-
attempt.CacheWriteCount = event.Metrics.CacheCreationCount
348-
attempt.Cache5mWriteCount = event.Metrics.Cache5mCreationCount
349-
attempt.Cache1hWriteCount = event.Metrics.Cache1hCreationCount
350-
needsBroadcast = true
351-
}
352-
case domain.EventResponseModel:
353-
if event.ResponseModel != "" {
354-
attempt.ResponseModel = event.ResponseModel
355-
needsBroadcast = true
331+
flush := func() {
332+
if !dirty || e.broadcaster == nil {
333+
dirty = false
334+
return
335+
}
336+
// 广播前做一次瘦身 + 快照,避免发送大字段、也避免指针被后续修改导致数据竞争
337+
snapshot := event.SanitizeProxyUpstreamAttemptForBroadcast(attempt)
338+
e.broadcaster.BroadcastProxyUpstreamAttempt(snapshot)
339+
dirty = false
340+
}
341+
342+
for {
343+
select {
344+
case ev, ok := <-eventChan:
345+
if !ok {
346+
flush()
347+
return
356348
}
357-
case domain.EventFirstToken:
358-
if event.FirstTokenTime > 0 {
359-
// Calculate TTFT as duration from start time to first token time
360-
firstTokenTime := time.UnixMilli(event.FirstTokenTime)
361-
attempt.TTFT = firstTokenTime.Sub(attempt.StartTime)
362-
needsBroadcast = true
349+
if ev == nil {
350+
continue
363351
}
364-
}
365352

366-
// Broadcast update immediately for real-time visibility
367-
if needsBroadcast && e.broadcaster != nil {
368-
e.broadcaster.BroadcastProxyUpstreamAttempt(attempt)
353+
switch ev.Type {
354+
case domain.EventRequestInfo:
355+
if !e.shouldClearRequestDetail() && ev.RequestInfo != nil {
356+
attempt.RequestInfo = ev.RequestInfo
357+
dirty = true
358+
}
359+
case domain.EventResponseInfo:
360+
if !e.shouldClearRequestDetail() && ev.ResponseInfo != nil {
361+
attempt.ResponseInfo = ev.ResponseInfo
362+
dirty = true
363+
}
364+
case domain.EventMetrics:
365+
if ev.Metrics != nil {
366+
attempt.InputTokenCount = ev.Metrics.InputTokens
367+
attempt.OutputTokenCount = ev.Metrics.OutputTokens
368+
attempt.CacheReadCount = ev.Metrics.CacheReadCount
369+
attempt.CacheWriteCount = ev.Metrics.CacheCreationCount
370+
attempt.Cache5mWriteCount = ev.Metrics.Cache5mCreationCount
371+
attempt.Cache1hWriteCount = ev.Metrics.Cache1hCreationCount
372+
dirty = true
373+
}
374+
case domain.EventResponseModel:
375+
if ev.ResponseModel != "" {
376+
attempt.ResponseModel = ev.ResponseModel
377+
dirty = true
378+
}
379+
case domain.EventFirstToken:
380+
if ev.FirstTokenTime > 0 {
381+
// Calculate TTFT as duration from start time to first token time
382+
firstTokenTime := time.UnixMilli(ev.FirstTokenTime)
383+
attempt.TTFT = firstTokenTime.Sub(attempt.StartTime)
384+
dirty = true
385+
}
386+
}
387+
case <-ticker.C:
388+
flush()
369389
}
370390
}
371391
}

internal/handler/websocket.go

Lines changed: 104 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,19 @@ package handler
22

33
import (
44
"bufio"
5+
"encoding/json"
56
"io"
67
"log"
78
"net/http"
89
"os"
10+
"strconv"
911
"strings"
1012
"sync"
13+
"sync/atomic"
14+
"time"
1115

1216
"github.com/awsl-project/maxx/internal/domain"
17+
"github.com/awsl-project/maxx/internal/event"
1318
"github.com/gorilla/websocket"
1419
)
1520

@@ -28,8 +33,13 @@ type WebSocketHub struct {
2833
clients map[*websocket.Conn]bool
2934
broadcast chan WSMessage
3035
mu sync.RWMutex
36+
37+
// broadcast channel 满时的丢弃计数(热路径:只做原子累加)
38+
broadcastDroppedTotal atomic.Uint64
3139
}
3240

41+
const websocketWriteTimeout = 5 * time.Second
42+
3343
func NewWebSocketHub() *WebSocketHub {
3444
hub := &WebSocketHub{
3545
clients: make(map[*websocket.Conn]bool),
@@ -41,15 +51,47 @@ func NewWebSocketHub() *WebSocketHub {
4151

4252
func (h *WebSocketHub) run() {
4353
for msg := range h.broadcast {
54+
// 避免在持锁状态下进行网络写入;同时修复 RLock 下 delete map 的数据竞争风险
4455
h.mu.RLock()
56+
clients := make([]*websocket.Conn, 0, len(h.clients))
4557
for client := range h.clients {
46-
err := client.WriteJSON(msg)
47-
if err != nil {
48-
client.Close()
58+
clients = append(clients, client)
59+
}
60+
h.mu.RUnlock()
61+
62+
var toRemove []*websocket.Conn
63+
for _, client := range clients {
64+
_ = client.SetWriteDeadline(time.Now().Add(websocketWriteTimeout))
65+
if err := client.WriteJSON(msg); err != nil {
66+
_ = client.Close()
67+
toRemove = append(toRemove, client)
68+
}
69+
}
70+
71+
if len(toRemove) > 0 {
72+
h.mu.Lock()
73+
for _, client := range toRemove {
4974
delete(h.clients, client)
5075
}
76+
h.mu.Unlock()
77+
}
78+
}
79+
}
80+
81+
func (h *WebSocketHub) tryEnqueueBroadcast(msg WSMessage, meta string) {
82+
select {
83+
case h.broadcast <- msg:
84+
default:
85+
dropped := h.broadcastDroppedTotal.Add(1)
86+
// 避免日志刷屏:首次 + 每100次打印一次,确保可观测性但不拖慢热路径。
87+
if dropped == 1 || dropped%100 == 0 {
88+
meta = strings.TrimSpace(meta)
89+
if meta != "" {
90+
log.Printf("[WebSocket] drop broadcast message type=%s %s dropped_total=%d", msg.Type, meta, dropped)
91+
} else {
92+
log.Printf("[WebSocket] drop broadcast message type=%s dropped_total=%d", msg.Type, dropped)
93+
}
5194
}
52-
h.mu.RUnlock()
5395
}
5496
}
5597

@@ -81,33 +123,84 @@ func (h *WebSocketHub) HandleWebSocket(w http.ResponseWriter, r *http.Request) {
81123
}
82124

83125
func (h *WebSocketHub) BroadcastProxyRequest(req *domain.ProxyRequest) {
84-
h.broadcast <- WSMessage{
126+
sanitized := event.SanitizeProxyRequestForBroadcast(req)
127+
var data interface{} = sanitized
128+
var meta string
129+
if sanitized != nil {
130+
// 无论 Sanitize 是否返回原指针,都强制做一次浅拷贝快照,避免异步消费者读到后续可变更的数据。
131+
snapshot := *sanitized
132+
data = snapshot
133+
meta = "requestID=" + snapshot.RequestID
134+
if snapshot.ID != 0 {
135+
meta += " requestDbID=" + strconv.FormatUint(snapshot.ID, 10)
136+
}
137+
}
138+
msg := WSMessage{
85139
Type: "proxy_request_update",
86-
Data: req,
140+
Data: data,
87141
}
142+
h.tryEnqueueBroadcast(msg, meta)
88143
}
89144

90145
func (h *WebSocketHub) BroadcastProxyUpstreamAttempt(attempt *domain.ProxyUpstreamAttempt) {
91-
h.broadcast <- WSMessage{
146+
sanitized := event.SanitizeProxyUpstreamAttemptForBroadcast(attempt)
147+
var data interface{} = sanitized
148+
var meta string
149+
if sanitized != nil {
150+
snapshot := *sanitized
151+
data = snapshot
152+
if snapshot.ProxyRequestID != 0 {
153+
meta = "proxyRequestID=" + strconv.FormatUint(snapshot.ProxyRequestID, 10)
154+
}
155+
if snapshot.ID != 0 {
156+
if meta != "" {
157+
meta += " "
158+
}
159+
meta += "attemptDbID=" + strconv.FormatUint(snapshot.ID, 10)
160+
}
161+
}
162+
msg := WSMessage{
92163
Type: "proxy_upstream_attempt_update",
93-
Data: attempt,
164+
Data: data,
94165
}
166+
h.tryEnqueueBroadcast(msg, meta)
95167
}
96168

97169
// BroadcastMessage sends a custom message with specified type to all connected clients
98170
func (h *WebSocketHub) BroadcastMessage(messageType string, data interface{}) {
99-
h.broadcast <- WSMessage{
171+
// 约定:BroadcastMessage 允许调用方传入 map/struct/指针等可变对象。
172+
//
173+
// 但由于实际发送是异步的(入队后由 run() 写到各连接),如果这里直接把可变指针放进 channel,
174+
// 调用方在入队后继续修改数据,会导致与 BroadcastProxyRequest 类似的数据竞态。
175+
//
176+
// 因此这里先把 data 预先序列化为 json.RawMessage,形成不可变快照;后续 WriteJSON 会直接写入该快照。
177+
var snapshot interface{} = data
178+
if data != nil {
179+
if raw, ok := data.(json.RawMessage); ok {
180+
snapshot = raw
181+
} else {
182+
b, err := json.Marshal(data)
183+
if err != nil {
184+
log.Printf("[WebSocket] drop broadcast message type=%s: marshal snapshot failed: %v", messageType, err)
185+
return
186+
}
187+
snapshot = json.RawMessage(b)
188+
}
189+
}
190+
msg := WSMessage{
100191
Type: messageType,
101-
Data: data,
192+
Data: snapshot,
102193
}
194+
h.tryEnqueueBroadcast(msg, "")
103195
}
104196

105197
// BroadcastLog sends a log message to all connected clients
106198
func (h *WebSocketHub) BroadcastLog(message string) {
107-
h.broadcast <- WSMessage{
199+
msg := WSMessage{
108200
Type: "log_message",
109201
Data: message,
110202
}
203+
h.tryEnqueueBroadcast(msg, "")
111204
}
112205

113206
// WebSocketLogWriter implements io.Writer to capture logs and broadcast via WebSocket

0 commit comments

Comments
 (0)