Skip to content

Commit c8ab861

Browse files
committed
fix: CheckLLMResponseStream QPS is too high
1 parent 04ca49e commit c8ab861

File tree

2 files changed

+21
-1
lines changed

2 files changed

+21
-1
lines changed

volcengine-java-sdk-wafruntime/src/main/java/com/volcengine/wafruntime/LLMStreamSession.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
* 表示一个流会话的类,包含流缓冲区、流发送长度和消息 ID 等信息。
66
*/
77
public class LLMStreamSession {
8+
private static final int LLM_STREAM_SEND_BASE_WINDOW = 10 ;
9+
private static final int LLM_STREAM_SEND_EXPONENT = 2 ;
10+
private int currentSendWindow = LLM_STREAM_SEND_BASE_WINDOW;
811
// 流缓冲区,用于存储流数据
912
private String streamBuf;
1013
// 流发送的长度
@@ -22,6 +25,7 @@ public LLMStreamSession() {
2225
this.streamSendLen = 0;
2326
this.msgID = "";
2427
this.defaultBody = null;
28+
this.currentSendWindow = LLM_STREAM_SEND_BASE_WINDOW;
2529
}
2630

2731
/**
@@ -37,6 +41,7 @@ public LLMStreamSession(String streamBuf, int streamSendLen, String msgID, Check
3741
this.streamSendLen = streamSendLen;
3842
this.msgID = msgID;
3943
this.defaultBody = defaultBody;
44+
this.currentSendWindow = LLM_STREAM_SEND_BASE_WINDOW;
4045
}
4146

4247
/**
@@ -123,5 +128,19 @@ void appendStreamBuf(String str) {
123128
this.streamSendLen += str.length();
124129
}
125130
}
131+
// 获取发送窗口长度
132+
public int getCurrentSendWindow() {
133+
return this.currentSendWindow;
134+
}
135+
136+
// 设置发送窗口长度
137+
public void setCurrentSendWindow(int currentSendWindow) {
138+
this.currentSendWindow = currentSendWindow;
139+
}
140+
// 窗口增长
141+
public void growSendWindow(){
142+
this.currentSendWindow = currentSendWindow * LLM_STREAM_SEND_EXPONENT;
143+
}
144+
126145

127146
}

volcengine-java-sdk-wafruntime/src/main/java/com/volcengine/wafruntime/WafApiRuntime.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,11 @@ public CheckLLMResponseStreamResponse checkLLMResponseStream(CheckLLMResponseStr
3939
session.appendStreamBuf(content);
4040

4141
// 发送长度小于10个字符并且不是第一次也不是最后一次的条件下则缓存content
42-
if (session.getStreamSendLen() < 10 && body.getMsgID() != null && body.getUseStream() != 2) {
42+
if (session.getStreamSendLen() < session.getCurrentSendWindow() && body.getMsgID() != null && body.getUseStream() != 2) {
4343
return session.getDefaultBody();
4444
}
4545
session.setStreamSendLen(0);
46+
session.growSendWindow();
4647
body.setContent(session.getStreamBuf());
4748

4849
String msgID = session.getMsgID();

0 commit comments

Comments
 (0)