Skip to content

Commit fa62ed8

Browse files
committed
Make qwen ux better
1 parent 87b70b3 commit fa62ed8

File tree

1 file changed

+54
-16
lines changed

1 file changed

+54
-16
lines changed

examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -194,21 +194,23 @@ struct ContentView: View {
194194
.background(Color.clear)
195195
.cornerRadius(8)
196196

197-
Button(action: {
198-
thinkingMode.toggle()
199-
showThinkingModeNotification = true
200-
DispatchQueue.main.asyncAfter(deadline: .now() + 3) {
201-
showThinkingModeNotification = false
197+
if resourceManager.isModelValid && ModelType.fromPath(resourceManager.modelPath) == .qwen3 {
198+
Button(action: {
199+
thinkingMode.toggle()
200+
showThinkingModeNotification = true
201+
DispatchQueue.main.asyncAfter(deadline: .now() + 3) {
202+
showThinkingModeNotification = false
203+
}
204+
}) {
205+
Image(systemName: "brain")
206+
.resizable()
207+
.scaledToFit()
208+
.frame(width: 24, height: 24)
209+
.foregroundColor(thinkingMode ? .blue : .gray)
202210
}
203-
}) {
204-
Image(systemName: "brain")
205-
.resizable()
206-
.scaledToFit()
207-
.frame(width: 24, height: 24)
208-
.foregroundColor(thinkingMode ? .blue : .gray)
211+
.background(Color.clear)
212+
.cornerRadius(8)
209213
}
210-
.background(Color.clear)
211-
.cornerRadius(8)
212214

213215
TextField(placeholder, text: $prompt, axis: .vertical)
214216
.padding(8)
@@ -464,7 +466,10 @@ struct ContentView: View {
464466
let prompt: String
465467
switch modelType {
466468
case .qwen3:
467-
prompt = String(format: Constants.qwen3PromptTemplate, text)
469+
let basePrompt = String(format: Constants.qwen3PromptTemplate, text)
470+
// If thinking mode is enabled for Qwen, don't skip the <think></think> special tokens
471+
// and have them be generated.
472+
prompt = thinkingMode ? basePrompt.replacingOccurrences(of: "<think>\n\n</think>\n\n\n", with: "") : basePrompt
468473
case .llama:
469474
prompt = String(format: Constants.llama3PromptTemplate, text)
470475
case .llava:
@@ -474,12 +479,45 @@ struct ContentView: View {
474479
try runnerHolder.runner?.generate(prompt, sequenceLength: seq_len) { token in
475480

476481
if token != prompt {
477-
// hack to fix the issue that extension/llm/runner/text_token_generator.h
478-
// keeps generating after <|eot_id|>
479482
if token == "<|eot_id|>" {
483+
// hack to fix the issue that extension/llm/runner/text_token_generator.h
484+
// keeps generating after <|eot_id|>
480485
shouldStopShowingToken = true
486+
} else if token == "<|im_end|>" {
487+
// Qwen3 specific token.
488+
// Skip.
489+
} else if token == "<think>" {
490+
// Qwen3 specific token.
491+
let textToFlush = tokens.joined()
492+
let flushedTokenCount = tokens.count
493+
tokens = []
494+
DispatchQueue.main.async {
495+
var message = messages.removeLast()
496+
message.text += textToFlush
497+
message.text += message.text.isEmpty ? "Thinking...\n\n" : "\n\nThinking...\n\n"
498+
message.format = .italic
499+
message.tokenCount += flushedTokenCount + 1 // + 1 for the start thinking token.
500+
message.dateUpdated = Date()
501+
messages.append(message)
502+
}
503+
} else if token == "</think>" {
504+
// Qwen3 specific token.
505+
let textToFlush = tokens.joined()
506+
let flushedTokenCount = tokens.count
507+
tokens = []
508+
DispatchQueue.main.async {
509+
var message = messages.removeLast()
510+
message.text += textToFlush
511+
message.text += "\n\nFinished thinking.\n\n"
512+
message.format = .italic
513+
message.tokenCount += flushedTokenCount + 1 // + 1 for the end thinking token.
514+
message.dateUpdated = Date()
515+
messages.append(message)
516+
}
481517
} else {
482518
tokens.append(token.trimmingCharacters(in: .newlines))
519+
// Flush tokens in groups of 3 so that it's closer to whole words being generated
520+
// rather than parts of words (tokens).
483521
if tokens.count > 2 {
484522
let text = tokens.joined()
485523
let count = tokens.count

0 commit comments

Comments
 (0)