Skip to content

Commit ba97fe9

Browse files
Minor changes
1 parent 6c7486d commit ba97fe9

File tree

2 files changed

+11
-13
lines changed

2 files changed

+11
-13
lines changed

src/main/java/org/beehive/gpullama3/api/controller/ChatController.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,14 @@ public SseEmitter streamChat(@RequestBody ChatRequest request) {
5050
}
5151

5252
SseEmitter emitter = new SseEmitter(Long.MAX_VALUE);
53-
llmService.generateStreamingResponse(request.getMessage(), request.getSystemMessage(),
54-
emitter, maxTokens, temperature, topP);
53+
llmService.generateStreamingResponse(
54+
request.getMessage(),
55+
request.getSystemMessage(),
56+
emitter,
57+
maxTokens,
58+
temperature,
59+
topP,
60+
request.getSeed());
5561

5662
return emitter;
5763
}

src/main/java/org/beehive/gpullama3/api/service/LLMService.java

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -138,15 +138,6 @@ public String generateResponse(String message, String systemMessage, int maxToke
138138
}
139139
}
140140

141-
public void generateStreamingResponse(String message, String systemMessage, SseEmitter emitter) {
142-
generateStreamingResponse(message, systemMessage, emitter, 150, 0.7, 0.9);
143-
}
144-
145-
public void generateStreamingResponse(String message, String systemMessage, SseEmitter emitter,
146-
int maxTokens, double temperature, double topP) {
147-
generateStreamingResponse(message, systemMessage, emitter, maxTokens, temperature, topP, null);
148-
}
149-
150141
public void generateStreamingResponse(String message, String systemMessage, SseEmitter emitter,
151142
int maxTokens, double temperature, double topP, Long seed) {
152143
CompletableFuture.runAsync(() -> {
@@ -170,11 +161,12 @@ public void generateStreamingResponse(String message, String systemMessage, SseE
170161
promptTokens.addAll(chatFormat.encodeMessage(new ChatFormat.Message(ChatFormat.Role.USER, message)));
171162
promptTokens.addAll(chatFormat.encodeHeader(new ChatFormat.Message(ChatFormat.Role.ASSISTANT, "")));
172163

173-
// Handle reasoning tokens for streaming
164+
// Include reasoning for Deepseek-R1-Distill-Qwen
174165
if (model.shouldIncludeReasoning()) {
175166
List<Integer> thinkStartTokens = model.tokenizer().encode("<think>\n", model.tokenizer().getSpecialTokens().keySet());
176167
promptTokens.addAll(thinkStartTokens);
177-
emitter.send(SseEmitter.event().data("<think>\n")); // Output immediately
168+
// We are in streaming, immediately output the think start
169+
emitter.send(SseEmitter.event().data("<think>\n"));
178170
}
179171

180172
Set<Integer> stopTokens = chatFormat.getStopTokens();

0 commit comments

Comments
 (0)