Skip to content

Commit 06e87ce

Browse files
shoumikhinfacebook-github-bot
authored andcommitted
Objective-C wrapper for multimodal LLM runner. (pytorch#14075)
Summary: Pull Request resolved: pytorch#14075 . Differential Revision: D81936485
1 parent 0eb4361 commit 06e87ce

File tree

12 files changed

+581
-23
lines changed

12 files changed

+581
-23
lines changed

docs/source/llm/run-on-ios.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import ExecuTorchLLM
2424

2525
### TextLLMRunner
2626

27-
The `ExecuTorchTextLLMRunner` class (bridged to Swift as `TextLLMRunner`) provides a simple Objective-C/Swift interface for loading a text-generation model, configuring its tokenizer with custom special tokens, generating token streams, and stopping execution.
27+
The `ExecuTorchLLMTextRunner` class (bridged to Swift as `TextLLMRunner`) provides a simple Objective-C/Swift interface for loading a text-generation model, configuring its tokenizer with custom special tokens, generating token streams, and stopping execution.
2828
This API is experimental and subject to change.
2929

3030
#### Initialization
@@ -38,7 +38,7 @@ NSString *modelPath = [[NSBundle mainBundle] pathForResource:@"llama-3.2-ins
3838
NSString *tokenizerPath = [[NSBundle mainBundle] pathForResource:@"tokenizer" ofType:@"model"];
3939
NSArray<NSString *> *specialTokens = @[ @"<|bos|>", @"<|eos|>" ];
4040

41-
ExecuTorchTextLLMRunner *runner = [[ExecuTorchTextLLMRunner alloc] initWithModelPath:modelPath
41+
ExecuTorchLLMTextRunner *runner = [[ExecuTorchLLMTextRunner alloc] initWithModelPath:modelPath
4242
tokenizerPath:tokenizerPath
4343
specialTokens:specialTokens];
4444
```

examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
#import <ExecuTorch/ExecuTorchLog.h>
1212
#if BUILD_WITH_XCODE
13-
#import "ExecuTorchTextLLMRunner.h"
13+
#import "ExecuTorchLLMTextRunner.h"
1414
#else
1515
#import <ExecuTorchLLM/ExecuTorchLLM.h>
1616
#endif
@@ -20,7 +20,7 @@ @interface LLaMARunner ()<ExecuTorchLogSink>
2020
@end
2121

2222
@implementation LLaMARunner {
23-
ExecuTorchTextLLMRunner *_runner;
23+
ExecuTorchLLMTextRunner *_runner;
2424
}
2525

2626
- (instancetype)initWithModelPath:(NSString *)modelPath
@@ -33,7 +33,7 @@ - (instancetype)initWithModelPath:(NSString *)modelPath
3333
for (const auto &token : *tokens) {
3434
[specialTokens addObject:(NSString *)@(token.c_str())];
3535
}
36-
_runner = [[ExecuTorchTextLLMRunner alloc] initWithModelPath:modelPath
36+
_runner = [[ExecuTorchLLMTextRunner alloc] initWithModelPath:modelPath
3737
tokenizerPath:tokenizerPath
3838
specialTokens:specialTokens];
3939
}

extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ @implementation LLaMATests
8787
for (NSUInteger index = 2; specialTokens.count < 256; ++index) {
8888
[specialTokens addObject:[NSString stringWithFormat:@"<|reserved_special_token_%zu|>", index]];
8989
}
90-
auto __block runner = [[ExecuTorchTextLLMRunner alloc] initWithModelPath:modelPath
90+
auto __block runner = [[ExecuTorchLLMTextRunner alloc] initWithModelPath:modelPath
9191
tokenizerPath:tokenizerPath
9292
specialTokens:specialTokens];
9393
NSError *error;

extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,6 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9-
#import "ExecuTorchTextLLMRunner.h"
9+
#import "ExecuTorchLLMError.h"
10+
#import "ExecuTorchLLMMultimodalRunner.h"
11+
#import "ExecuTorchLLMTextRunner.h"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#import <Foundation/Foundation.h>
10+
11+
NS_ASSUME_NONNULL_BEGIN
12+
13+
FOUNDATION_EXPORT NSErrorDomain const ExecuTorchLLMErrorDomain NS_SWIFT_NAME(ErrorDomain);
14+
15+
NS_ASSUME_NONNULL_END
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#import "ExecuTorchLLMError.h"
10+
11+
NSErrorDomain const ExecuTorchLLMErrorDomain = @"org.pytorch.executorch.llm.error";
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#import <Foundation/Foundation.h>
10+
11+
NS_ASSUME_NONNULL_BEGIN
12+
13+
/**
14+
Types of multimodal inputs supported by the ExecuTorch LLM APIs.
15+
*/
16+
typedef NS_ENUM(NSInteger, ExecuTorchLLMMultimodalInputType) {
17+
ExecuTorchLLMMultimodalInputTypeText,
18+
ExecuTorchLLMMultimodalInputTypeImage,
19+
ExecuTorchLLMMultimodalInputTypeAudio,
20+
ExecuTorchLLMMultimodalInputTypeRawAudio,
21+
ExecuTorchLLMMultimodalInputTypeUnsupported,
22+
} NS_SWIFT_NAME(MultimodalInputType);
23+
24+
/**
25+
A container for image inputs used with multimodal generation APIs.
26+
*/
27+
NS_SWIFT_NAME(Image)
28+
__attribute__((deprecated("This API is experimental.")))
29+
@interface ExecuTorchLLMImage : NSObject<NSCopying>
30+
31+
/**
32+
Initializes an image container with the provided data and dimensions.
33+
34+
@param data Raw image bytes.
35+
@param width Image width in pixels.
36+
@param height Image height in pixels.
37+
@param channels Number of channels.
38+
@return An initialized ExecuTorchLLMImage instance.
39+
*/
40+
- (instancetype)initWithData:(NSData *)data
41+
width:(NSInteger)width
42+
height:(NSInteger)height
43+
channels:(NSInteger)channels
44+
NS_DESIGNATED_INITIALIZER;
45+
46+
@property(nonatomic, readonly) NSData *data;
47+
@property(nonatomic, readonly) NSInteger width;
48+
@property(nonatomic, readonly) NSInteger height;
49+
@property(nonatomic, readonly) NSInteger channels;
50+
51+
+ (instancetype)new NS_UNAVAILABLE;
52+
- (instancetype)init NS_UNAVAILABLE;
53+
54+
@end
55+
56+
/**
57+
A container for pre-processed audio features.
58+
*/
59+
NS_SWIFT_NAME(Audio)
60+
__attribute__((deprecated("This API is experimental.")))
61+
@interface ExecuTorchLLMAudio : NSObject<NSCopying>
62+
63+
/**
64+
Initializes an audio features container with the provided data and shape.
65+
66+
@param data Feature buffer.
67+
@param batchSize Batch dimension size.
68+
@param bins Number of frequency bins.
69+
@param frames Number of time frames.
70+
@return An initialized ExecuTorchLLMAudio instance.
71+
*/
72+
- (instancetype)initWithData:(NSData *)data
73+
batchSize:(NSInteger)batchSize
74+
bins:(NSInteger)bins
75+
frames:(NSInteger)frames
76+
NS_DESIGNATED_INITIALIZER;
77+
78+
@property(nonatomic, readonly) NSData *data;
79+
@property(nonatomic, readonly) NSInteger batchSize;
80+
@property(nonatomic, readonly) NSInteger bins;
81+
@property(nonatomic, readonly) NSInteger frames;
82+
83+
+ (instancetype)new NS_UNAVAILABLE;
84+
- (instancetype)init NS_UNAVAILABLE;
85+
86+
@end
87+
88+
/**
89+
A container for raw PCM audio.
90+
*/
91+
NS_SWIFT_NAME(RawAudio)
92+
__attribute__((deprecated("This API is experimental.")))
93+
@interface ExecuTorchLLMRawAudio : NSObject<NSCopying>
94+
95+
/**
96+
Initializes a raw audio container with the provided data and shape.
97+
98+
@param data Raw PCM audio bytes.
99+
@param batchSize Batch dimension size.
100+
@param channels Number of audio channels.
101+
@param samples Number of samples per channel.
102+
@return An initialized ExecuTorchLLMRawAudio instance.
103+
*/
104+
- (instancetype)initWithData:(NSData *)data
105+
batchSize:(NSInteger)batchSize
106+
channels:(NSInteger)channels
107+
samples:(NSInteger)samples
108+
NS_DESIGNATED_INITIALIZER;
109+
110+
@property(nonatomic, readonly) NSData *data;
111+
@property(nonatomic, readonly) NSInteger batchSize;
112+
@property(nonatomic, readonly) NSInteger channels;
113+
@property(nonatomic, readonly) NSInteger samples;
114+
115+
+ (instancetype)new NS_UNAVAILABLE;
116+
- (instancetype)init NS_UNAVAILABLE;
117+
118+
@end
119+
120+
/**
121+
A tagged container for a single multimodal input item used by
122+
multimodal generation APIs.
123+
*/
124+
NS_SWIFT_NAME(MultimodalInput)
125+
__attribute__((deprecated("This API is experimental.")))
126+
@interface ExecuTorchLLMMultimodalInput : NSObject<NSCopying>
127+
128+
/**
129+
Creates a text input.
130+
131+
@param text The UTF-8 text to provide as input.
132+
@return A retained ExecuTorchLLMMultimodalInput instance of type Text.
133+
*/
134+
+ (instancetype)inputWithText:(NSString *)text
135+
NS_SWIFT_NAME(init(_:))
136+
NS_RETURNS_RETAINED;
137+
138+
/**
139+
Creates an image input.
140+
141+
@param image The image payload to provide as input.
142+
@return A retained ExecuTorchLLMMultimodalInput instance of type Image.
143+
*/
144+
+ (instancetype)inputWithImage:(ExecuTorchLLMImage *)image
145+
NS_SWIFT_NAME(init(_:))
146+
NS_RETURNS_RETAINED;
147+
148+
/**
149+
Creates an audio-features input.
150+
151+
@param audio The pre-processed audio features to provide as input.
152+
@return A retained ExecuTorchLLMMultimodalInput instance of type Audio.
153+
*/
154+
+ (instancetype)inputWithAudio:(ExecuTorchLLMAudio *)audio
155+
NS_SWIFT_NAME(init(audio:))
156+
NS_RETURNS_RETAINED;
157+
158+
/**
159+
Creates a raw-audio input.
160+
161+
@param rawAudio The raw PCM audio to provide as input.
162+
@return A retained ExecuTorchLLMMultimodalInput instance of type RawAudio.
163+
*/
164+
+ (instancetype)inputWithRawAudio:(ExecuTorchLLMRawAudio *)rawAudio
165+
NS_SWIFT_NAME(init(rawAudio:))
166+
NS_RETURNS_RETAINED;
167+
168+
@property(nonatomic, readonly) ExecuTorchLLMMultimodalInputType type;
169+
@property(nonatomic, readonly, nullable) NSString *text;
170+
@property(nonatomic, readonly, nullable) ExecuTorchLLMImage *image;
171+
@property(nonatomic, readonly, nullable) ExecuTorchLLMAudio *audio;
172+
@property(nonatomic, readonly, nullable) ExecuTorchLLMRawAudio *rawAudio;
173+
174+
+ (instancetype)new NS_UNAVAILABLE;
175+
- (instancetype)init NS_UNAVAILABLE;
176+
177+
@end
178+
179+
/**
180+
A wrapper class for the C++ llm::MultimodalLLMRunner that provides
181+
Objective-C APIs to load models, manage tokenization, accept mixed
182+
input modalities, generate text sequences, and stop the runner.
183+
*/
184+
NS_SWIFT_NAME(MultimodalRunner)
185+
__attribute__((deprecated("This API is experimental.")))
186+
@interface ExecuTorchLLMMultimodalRunner : NSObject
187+
188+
/**
189+
Initializes a multimodal LLM runner with the given model and tokenizer paths.
190+
191+
@param modelPath File system path to the serialized model.
192+
@param tokenizerPath File system path to the tokenizer data.
193+
@return An initialized ExecuTorchLLMMultimodalRunner instance.
194+
*/
195+
- (instancetype)initWithModelPath:(NSString *)modelPath
196+
tokenizerPath:(NSString *)tokenizerPath
197+
NS_DESIGNATED_INITIALIZER;
198+
199+
/**
200+
Checks whether the underlying model has been successfully loaded.
201+
202+
@return YES if the model is loaded, NO otherwise.
203+
*/
204+
- (BOOL)isLoaded;
205+
206+
/**
207+
Loads the model into memory, returning an error if loading fails.
208+
209+
@param error On failure, populated with an NSError explaining the issue.
210+
@return YES if loading succeeds, NO if an error occurred.
211+
*/
212+
- (BOOL)loadWithError:(NSError **)error;
213+
214+
/**
215+
Generates text given a list of multimodal inputs, up to a specified sequence length.
216+
Invokes the provided callback for each generated token.
217+
218+
@param inputs An ordered array of multimodal inputs.
219+
@param seq_len The maximum number of tokens to generate.
220+
@param callback A block called with each generated token as an NSString.
221+
@param error On failure, populated with an NSError explaining the issue.
222+
@return YES if generation completes successfully, NO if an error occurred.
223+
*/
224+
- (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
225+
sequenceLength:(NSInteger)seq_len
226+
withTokenCallback:(nullable void (^)(NSString *))callback
227+
error:(NSError **)error;
228+
229+
/**
230+
Stops any ongoing generation and cleans up internal resources.
231+
*/
232+
- (void)stop;
233+
234+
+ (instancetype)new NS_UNAVAILABLE;
235+
- (instancetype)init NS_UNAVAILABLE;
236+
237+
@end
238+
239+
NS_ASSUME_NONNULL_END

0 commit comments

Comments
 (0)