Skip to content

Commit c22a2b0

Browse files
authored
Objective-C wrapper for multimodal LLM runner. (#14075)
Summary: . Differential Revision: D81936485
1 parent 00b50b2 commit c22a2b0

File tree

12 files changed

+503
-23
lines changed

12 files changed

+503
-23
lines changed

docs/source/llm/run-on-ios.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import ExecuTorchLLM
2424

2525
### TextLLMRunner
2626

27-
The `ExecuTorchTextLLMRunner` class (bridged to Swift as `TextLLMRunner`) provides a simple Objective-C/Swift interface for loading a text-generation model, configuring its tokenizer with custom special tokens, generating token streams, and stopping execution.
27+
The `ExecuTorchLLMTextRunner` class (bridged to Swift as `TextLLMRunner`) provides a simple Objective-C/Swift interface for loading a text-generation model, configuring its tokenizer with custom special tokens, generating token streams, and stopping execution.
2828
This API is experimental and subject to change.
2929

3030
#### Initialization
@@ -38,7 +38,7 @@ NSString *modelPath = [[NSBundle mainBundle] pathForResource:@"llama-3.2-ins
3838
NSString *tokenizerPath = [[NSBundle mainBundle] pathForResource:@"tokenizer" ofType:@"model"];
3939
NSArray<NSString *> *specialTokens = @[ @"<|bos|>", @"<|eos|>" ];
4040

41-
ExecuTorchTextLLMRunner *runner = [[ExecuTorchTextLLMRunner alloc] initWithModelPath:modelPath
41+
ExecuTorchLLMTextRunner *runner = [[ExecuTorchLLMTextRunner alloc] initWithModelPath:modelPath
4242
tokenizerPath:tokenizerPath
4343
specialTokens:specialTokens];
4444
```

examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
#import <ExecuTorch/ExecuTorchLog.h>
1212
#if BUILD_WITH_XCODE
13-
#import "ExecuTorchTextLLMRunner.h"
13+
#import "ExecuTorchLLMTextRunner.h"
1414
#else
1515
#import <ExecuTorchLLM/ExecuTorchLLM.h>
1616
#endif
@@ -20,7 +20,7 @@ @interface LLaMARunner ()<ExecuTorchLogSink>
2020
@end
2121

2222
@implementation LLaMARunner {
23-
ExecuTorchTextLLMRunner *_runner;
23+
ExecuTorchLLMTextRunner *_runner;
2424
}
2525

2626
- (instancetype)initWithModelPath:(NSString *)modelPath
@@ -33,7 +33,7 @@ - (instancetype)initWithModelPath:(NSString *)modelPath
3333
for (const auto &token : *tokens) {
3434
[specialTokens addObject:(NSString *)@(token.c_str())];
3535
}
36-
_runner = [[ExecuTorchTextLLMRunner alloc] initWithModelPath:modelPath
36+
_runner = [[ExecuTorchLLMTextRunner alloc] initWithModelPath:modelPath
3737
tokenizerPath:tokenizerPath
3838
specialTokens:specialTokens];
3939
}

extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ @implementation LLaMATests
8787
for (NSUInteger index = 2; specialTokens.count < 256; ++index) {
8888
[specialTokens addObject:[NSString stringWithFormat:@"<|reserved_special_token_%zu|>", index]];
8989
}
90-
auto __block runner = [[ExecuTorchTextLLMRunner alloc] initWithModelPath:modelPath
90+
auto __block runner = [[ExecuTorchLLMTextRunner alloc] initWithModelPath:modelPath
9191
tokenizerPath:tokenizerPath
9292
specialTokens:specialTokens];
9393
NSError *error;

extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,6 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9-
#import "ExecuTorchTextLLMRunner.h"
9+
#import "ExecuTorchLLMError.h"
10+
#import "ExecuTorchLLMMultimodalRunner.h"
11+
#import "ExecuTorchLLMTextRunner.h"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#import <Foundation/Foundation.h>
10+
11+
NS_ASSUME_NONNULL_BEGIN
12+
13+
FOUNDATION_EXPORT NSErrorDomain const ExecuTorchLLMErrorDomain NS_SWIFT_NAME(ErrorDomain);
14+
15+
NS_ASSUME_NONNULL_END
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#import "ExecuTorchLLMError.h"
10+
11+
NSErrorDomain const ExecuTorchLLMErrorDomain = @"org.pytorch.executorch.llm.error";
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#import <Foundation/Foundation.h>
10+
11+
NS_ASSUME_NONNULL_BEGIN
12+
13+
/**
14+
Types of multimodal inputs supported by the ExecuTorch LLM APIs.
15+
Must be in sync with the C++ enum in llm/runner/multimodal_input.h
16+
*/
17+
typedef NS_ENUM(NSInteger, ExecuTorchLLMMultimodalInputType) {
18+
ExecuTorchLLMMultimodalInputTypeText,
19+
ExecuTorchLLMMultimodalInputTypeImage,
20+
ExecuTorchLLMMultimodalInputTypeAudio,
21+
ExecuTorchLLMMultimodalInputTypeUnsupported,
22+
} NS_SWIFT_NAME(MultimodalInputType);
23+
24+
/**
25+
A container for image inputs used with multimodal generation APIs.
26+
*/
27+
NS_SWIFT_NAME(Image)
28+
__attribute__((deprecated("This API is experimental.")))
29+
@interface ExecuTorchLLMImage : NSObject<NSCopying>
30+
31+
/**
32+
Initializes an image container with the provided data and dimensions.
33+
34+
@param data Raw image bytes.
35+
@param width Image width in pixels.
36+
@param height Image height in pixels.
37+
@param channels Number of channels.
38+
@return An initialized ExecuTorchLLMImage instance.
39+
*/
40+
- (instancetype)initWithData:(NSData *)data
41+
width:(NSInteger)width
42+
height:(NSInteger)height
43+
channels:(NSInteger)channels
44+
NS_DESIGNATED_INITIALIZER;
45+
46+
@property(nonatomic, readonly) NSData *data;
47+
@property(nonatomic, readonly) NSInteger width;
48+
@property(nonatomic, readonly) NSInteger height;
49+
@property(nonatomic, readonly) NSInteger channels;
50+
51+
+ (instancetype)new NS_UNAVAILABLE;
52+
- (instancetype)init NS_UNAVAILABLE;
53+
54+
@end
55+
56+
/**
57+
A container for pre-processed audio features.
58+
*/
59+
NS_SWIFT_NAME(Audio)
60+
__attribute__((deprecated("This API is experimental.")))
61+
@interface ExecuTorchLLMAudio : NSObject<NSCopying>
62+
63+
/**
64+
Initializes an audio features container with the provided data and shape.
65+
66+
@param data Feature buffer.
67+
@param batchSize Batch dimension size.
68+
@param bins Number of frequency bins.
69+
@param frames Number of time frames.
70+
@return An initialized ExecuTorchLLMAudio instance.
71+
*/
72+
- (instancetype)initWithData:(NSData *)data
73+
batchSize:(NSInteger)batchSize
74+
bins:(NSInteger)bins
75+
frames:(NSInteger)frames
76+
NS_DESIGNATED_INITIALIZER;
77+
78+
@property(nonatomic, readonly) NSData *data;
79+
@property(nonatomic, readonly) NSInteger batchSize;
80+
@property(nonatomic, readonly) NSInteger bins;
81+
@property(nonatomic, readonly) NSInteger frames;
82+
83+
+ (instancetype)new NS_UNAVAILABLE;
84+
- (instancetype)init NS_UNAVAILABLE;
85+
86+
@end
87+
88+
/**
89+
A tagged container for a single multimodal input item used by
90+
multimodal generation APIs.
91+
*/
92+
NS_SWIFT_NAME(MultimodalInput)
93+
__attribute__((deprecated("This API is experimental.")))
94+
@interface ExecuTorchLLMMultimodalInput : NSObject<NSCopying>
95+
96+
/**
97+
Creates a text input.
98+
99+
@param text The UTF-8 text to provide as input.
100+
@return A retained ExecuTorchLLMMultimodalInput instance of type Text.
101+
*/
102+
+ (instancetype)inputWithText:(NSString *)text
103+
NS_SWIFT_NAME(init(_:))
104+
NS_RETURNS_RETAINED;
105+
106+
/**
107+
Creates an image input.
108+
109+
@param image The image payload to provide as input.
110+
@return A retained ExecuTorchLLMMultimodalInput instance of type Image.
111+
*/
112+
+ (instancetype)inputWithImage:(ExecuTorchLLMImage *)image
113+
NS_SWIFT_NAME(init(_:))
114+
NS_RETURNS_RETAINED;
115+
116+
/**
117+
Creates an audio-features input.
118+
119+
@param audio The pre-processed audio features to provide as input.
120+
@return A retained ExecuTorchLLMMultimodalInput instance of type Audio.
121+
*/
122+
+ (instancetype)inputWithAudio:(ExecuTorchLLMAudio *)audio
123+
NS_SWIFT_NAME(init(audio:))
124+
NS_RETURNS_RETAINED;
125+
126+
@property(nonatomic, readonly) ExecuTorchLLMMultimodalInputType type;
127+
@property(nonatomic, readonly, nullable) NSString *text;
128+
@property(nonatomic, readonly, nullable) ExecuTorchLLMImage *image;
129+
@property(nonatomic, readonly, nullable) ExecuTorchLLMAudio *audio;
130+
131+
+ (instancetype)new NS_UNAVAILABLE;
132+
- (instancetype)init NS_UNAVAILABLE;
133+
134+
@end
135+
136+
/**
137+
A wrapper class for the C++ llm::MultimodalLLMRunner that provides
138+
Objective-C APIs to load models, manage tokenization, accept mixed
139+
input modalities, generate text sequences, and stop the runner.
140+
*/
141+
NS_SWIFT_NAME(MultimodalRunner)
142+
__attribute__((deprecated("This API is experimental.")))
143+
@interface ExecuTorchLLMMultimodalRunner : NSObject
144+
145+
/**
146+
Initializes a multimodal LLM runner with the given model and tokenizer paths.
147+
148+
@param modelPath File system path to the serialized model.
149+
@param tokenizerPath File system path to the tokenizer data.
150+
@return An initialized ExecuTorchLLMMultimodalRunner instance.
151+
*/
152+
- (instancetype)initWithModelPath:(NSString *)modelPath
153+
tokenizerPath:(NSString *)tokenizerPath
154+
NS_DESIGNATED_INITIALIZER;
155+
156+
/**
157+
Checks whether the underlying model has been successfully loaded.
158+
159+
@return YES if the model is loaded, NO otherwise.
160+
*/
161+
- (BOOL)isLoaded;
162+
163+
/**
164+
Loads the model into memory, returning an error if loading fails.
165+
166+
@param error On failure, populated with an NSError explaining the issue.
167+
@return YES if loading succeeds, NO if an error occurred.
168+
*/
169+
- (BOOL)loadWithError:(NSError **)error;
170+
171+
/**
172+
Generates text given a list of multimodal inputs, up to a specified sequence length.
173+
Invokes the provided callback for each generated token.
174+
175+
@param inputs An ordered array of multimodal inputs.
176+
@param seq_len The maximum number of tokens to generate.
177+
@param callback A block called with each generated token as an NSString.
178+
@param error On failure, populated with an NSError explaining the issue.
179+
@return YES if generation completes successfully, NO if an error occurred.
180+
*/
181+
- (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
182+
sequenceLength:(NSInteger)seq_len
183+
withTokenCallback:(nullable void (^)(NSString *))callback
184+
error:(NSError **)error;
185+
186+
/**
187+
Stops any ongoing generation and cleans up internal resources.
188+
*/
189+
- (void)stop;
190+
191+
+ (instancetype)new NS_UNAVAILABLE;
192+
- (instancetype)init NS_UNAVAILABLE;
193+
194+
@end
195+
196+
NS_ASSUME_NONNULL_END

0 commit comments

Comments
 (0)