Skip to content

Commit 70ea661

Browse files
authored
Add Phi4 test and fix regex parsing.
Differential Revision: D83641294 Pull Request resolved: #14716
1 parent e652746 commit 70ea661

File tree

4 files changed

+59
-6
lines changed

4 files changed

+59
-6
lines changed

extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,23 @@ __attribute__((deprecated("This API is experimental.")))
2525
2626
@param modelPath File system path to the serialized model.
2727
@param tokenizerPath File system path to the tokenizer data.
28-
@param tokens An array of NSString special tokens to use during tokenization.
28+
@return An initialized ExecuTorchLLMTextRunner instance.
29+
*/
30+
- (instancetype)initWithModelPath:(NSString *)modelPath
31+
tokenizerPath:(NSString *)tokenizerPath;
32+
33+
/**
34+
Initializes a text LLM runner with the given model and tokenizer paths,
35+
and a list of special tokens to include in the tokenizer.
36+
37+
@param modelPath File system path to the serialized model.
38+
@param tokenizerPath File system path to the tokenizer data.
39+
@param specialTokens An array of NSString special tokens to use during tokenization.
2940
@return An initialized ExecuTorchLLMTextRunner instance.
3041
*/
3142
- (instancetype)initWithModelPath:(NSString *)modelPath
3243
tokenizerPath:(NSString *)tokenizerPath
33-
specialTokens:(NSArray<NSString *> *)tokens
44+
specialTokens:(NSArray<NSString *> *)specialTokens
3445
NS_DESIGNATED_INITIALIZER;
3546

3647
/**

extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,22 @@ @implementation ExecuTorchLLMTextRunner {
2828
std::unique_ptr<llm::TextLLMRunner> _runner;
2929
}
3030

31+
- (instancetype)initWithModelPath:(NSString*)modelPath
32+
tokenizerPath:(NSString*)tokenizerPath {
33+
return [self initWithModelPath:modelPath
34+
tokenizerPath:tokenizerPath
35+
specialTokens:@[]];
36+
}
37+
3138
- (instancetype)initWithModelPath:(NSString*)modelPath
3239
tokenizerPath:(NSString*)tokenizerPath
33-
specialTokens:(NSArray<NSString*>*)tokens {
40+
specialTokens:(NSArray<NSString*>*)specialTokens {
3441
self = [super init];
3542
if (self) {
3643
_modelPath = [modelPath copy];
3744
_tokenizerPath = [tokenizerPath copy];
3845
_specialTokens = std::make_unique<std::vector<std::string>>();
39-
for (NSString *token in tokens) {
46+
for (NSString *token in specialTokens) {
4047
_specialTokens->emplace_back(token.UTF8String);
4148
}
4249
}

extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class MultimodalRunnerTest: XCTestCase {
6060
let userPrompt = "What's on the picture?"
6161
let sequenceLength = 768
6262

63-
func test() {
63+
func testLLaVA() {
6464
let bundle = Bundle(for: type(of: self))
6565
guard let modelPath = bundle.path(forResource: "llava", ofType: "pte"),
6666
let tokenizerPath = bundle.path(forResource: "tokenizer", ofType: "bin"),

extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class TextRunnerTest: XCTestCase {
3939
let userPrompt = "The capital of France is called"
4040
let sequenceLength = 128
4141

42-
func test() {
42+
func testLLaMA() {
4343
let bundle = Bundle(for: type(of: self))
4444
guard let modelPath = bundle.path(forResource: "llama3_2-1B", ofType: "pte"),
4545
let tokenizerPath = bundle.path(forResource: "tokenizer", ofType: "model") else {
@@ -73,4 +73,39 @@ class TextRunnerTest: XCTestCase {
7373
}
7474
XCTAssertTrue(text.lowercased().contains("paris"))
7575
}
76+
77+
func testPhi4() {
78+
let bundle = Bundle(for: type(of: self))
79+
guard let modelPath = bundle.path(forResource: "phi4-mini", ofType: "pte"),
80+
let tokenizerPath = bundle.path(forResource: "tokenizer", ofType: "json") else {
81+
XCTFail("Couldn't find model or tokenizer files")
82+
return
83+
}
84+
let runner = TextRunner(modelPath: modelPath, tokenizerPath: tokenizerPath)
85+
var text = ""
86+
87+
do {
88+
try runner.generate(userPrompt, Config {
89+
$0.sequenceLength = sequenceLength
90+
}) { token in
91+
text += token
92+
}
93+
} catch {
94+
XCTFail("Failed to generate text with error \(error)")
95+
}
96+
XCTAssertTrue(text.lowercased().contains("paris"))
97+
98+
text = ""
99+
runner.reset()
100+
do {
101+
try runner.generate(userPrompt, Config {
102+
$0.sequenceLength = sequenceLength
103+
}) { token in
104+
text += token
105+
}
106+
} catch {
107+
XCTFail("Failed to generate text with error \(error)")
108+
}
109+
XCTAssertTrue(text.lowercased().contains("paris"))
110+
}
76111
}

0 commit comments

Comments
 (0)