@@ -138,15 +138,6 @@ public String generateResponse(String message, String systemMessage, int maxToke
138
138
}
139
139
}
140
140
141
- public void generateStreamingResponse (String message , String systemMessage , SseEmitter emitter ) {
142
- generateStreamingResponse (message , systemMessage , emitter , 150 , 0.7 , 0.9 );
143
- }
144
-
145
- public void generateStreamingResponse (String message , String systemMessage , SseEmitter emitter ,
146
- int maxTokens , double temperature , double topP ) {
147
- generateStreamingResponse (message , systemMessage , emitter , maxTokens , temperature , topP , null );
148
- }
149
-
150
141
public void generateStreamingResponse (String message , String systemMessage , SseEmitter emitter ,
151
142
int maxTokens , double temperature , double topP , Long seed ) {
152
143
CompletableFuture .runAsync (() -> {
@@ -170,11 +161,12 @@ public void generateStreamingResponse(String message, String systemMessage, SseE
170
161
promptTokens .addAll (chatFormat .encodeMessage (new ChatFormat .Message (ChatFormat .Role .USER , message )));
171
162
promptTokens .addAll (chatFormat .encodeHeader (new ChatFormat .Message (ChatFormat .Role .ASSISTANT , "" )));
172
163
173
- // Handle reasoning tokens for streaming
164
+ // Include reasoning for Deepseek-R1-Distill-Qwen
174
165
if (model .shouldIncludeReasoning ()) {
175
166
List <Integer > thinkStartTokens = model .tokenizer ().encode ("<think>\n " , model .tokenizer ().getSpecialTokens ().keySet ());
176
167
promptTokens .addAll (thinkStartTokens );
177
- emitter .send (SseEmitter .event ().data ("<think>\n " )); // Output immediately
168
+ // We are in streaming, immediately output the think start
169
+ emitter .send (SseEmitter .event ().data ("<think>\n " ));
178
170
}
179
171
180
172
Set <Integer > stopTokens = chatFormat .getStopTokens ();
0 commit comments