@@ -194,21 +194,23 @@ struct ContentView: View {
194194 . background ( Color . clear)
195195 . cornerRadius ( 8 )
196196
197- Button ( action: {
198- thinkingMode. toggle ( )
199- showThinkingModeNotification = true
200- DispatchQueue . main. asyncAfter ( deadline: . now( ) + 3 ) {
201- showThinkingModeNotification = false
197+ if resourceManager. isModelValid && ModelType . fromPath ( resourceManager. modelPath) == . qwen3 {
198+ Button ( action: {
199+ thinkingMode. toggle ( )
200+ showThinkingModeNotification = true
201+ DispatchQueue . main. asyncAfter ( deadline: . now( ) + 3 ) {
202+ showThinkingModeNotification = false
203+ }
204+ } ) {
205+ Image ( systemName: " brain " )
206+ . resizable ( )
207+ . scaledToFit ( )
208+ . frame ( width: 24 , height: 24 )
209+ . foregroundColor ( thinkingMode ? . blue : . gray)
202210 }
203- } ) {
204- Image ( systemName: " brain " )
205- . resizable ( )
206- . scaledToFit ( )
207- . frame ( width: 24 , height: 24 )
208- . foregroundColor ( thinkingMode ? . blue : . gray)
211+ . background ( Color . clear)
212+ . cornerRadius ( 8 )
209213 }
210- . background ( Color . clear)
211- . cornerRadius ( 8 )
212214
213215 TextField ( placeholder, text: $prompt, axis: . vertical)
214216 . padding ( 8 )
@@ -464,7 +466,10 @@ struct ContentView: View {
464466 let prompt : String
465467 switch modelType {
466468 case . qwen3:
467- prompt = String ( format: Constants . qwen3PromptTemplate, text)
469+ let basePrompt = String ( format: Constants . qwen3PromptTemplate, text)
470+ // If thinking mode is enabled for Qwen, don't skip the <think></think> special tokens
471+ // and have them be generated.
472+ prompt = thinkingMode ? basePrompt. replacingOccurrences ( of: " <think> \n \n </think> \n \n \n " , with: " " ) : basePrompt
468473 case . llama:
469474 prompt = String ( format: Constants . llama3PromptTemplate, text)
470475 case . llava:
@@ -474,12 +479,45 @@ struct ContentView: View {
474479 try runnerHolder. runner? . generate ( prompt, sequenceLength: seq_len) { token in
475480
476481 if token != prompt {
477- // hack to fix the issue that extension/llm/runner/text_token_generator.h
478- // keeps generating after <|eot_id|>
479482 if token == " <|eot_id|> " {
483+ // hack to fix the issue that extension/llm/runner/text_token_generator.h
484+ // keeps generating after <|eot_id|>
480485 shouldStopShowingToken = true
486+ } else if token == " <|im_end|> " {
487+ // Qwen3 specific token.
488+ // Skip.
489+ } else if token == " <think> " {
490+ // Qwen3 specific token.
491+ let textToFlush = tokens. joined ( )
492+ let flushedTokenCount = tokens. count
493+ tokens = [ ]
494+ DispatchQueue . main. async {
495+ var message = messages. removeLast ( )
496+ message. text += textToFlush
497+ message. text += message. text. isEmpty ? " Thinking... \n \n " : " \n \n Thinking... \n \n "
498+ message. format = . italic
499+ message. tokenCount += flushedTokenCount + 1 // + 1 for the start thinking token.
500+ message. dateUpdated = Date ( )
501+ messages. append ( message)
502+ }
503+ } else if token == " </think> " {
504+ // Qwen3 specific token.
505+ let textToFlush = tokens. joined ( )
506+ let flushedTokenCount = tokens. count
507+ tokens = [ ]
508+ DispatchQueue . main. async {
509+ var message = messages. removeLast ( )
510+ message. text += textToFlush
511+ message. text += " \n \n Finished thinking. \n \n "
512+ message. format = . italic
513+ message. tokenCount += flushedTokenCount + 1 // + 1 for the end thinking token.
514+ message. dateUpdated = Date ( )
515+ messages. append ( message)
516+ }
481517 } else {
482518 tokens. append ( token. trimmingCharacters ( in: . newlines) )
519+ // Flush tokens in groups of 3 so that it's closer to whole words being generated
520+ // rather than parts of words (tokens).
483521 if tokens. count > 2 {
484522 let text = tokens. joined ( )
485523 let count = tokens. count
0 commit comments