Skip to content

Commit 6bfe337

Browse files
authored
Pass a full generation config instead of just sequence length to the runners.
Differential Revision: D83382480 Pull Request resolved: #14635
1 parent 73dc30b commit 6bfe337

9 files changed

+246
-35
lines changed

extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#import "ExecuTorchLLMConfig.h"
910
#import "ExecuTorchLLMError.h"
1011
#import "ExecuTorchLLMMultimodalRunner.h"
1112
#import "ExecuTorchLLMTextRunner.h"
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#import <Foundation/Foundation.h>
10+
11+
NS_ASSUME_NONNULL_BEGIN
12+
13+
/**
14+
A configuration object for text generation.
15+
16+
This class wraps the underlying C++ GenerationConfig so that default
17+
values and future fields remain a single source of truth in C++.
18+
*/
19+
NS_SWIFT_NAME(Config)
20+
__attribute__((deprecated("This API is experimental.")))
21+
__attribute__((objc_subclassing_restricted))
22+
@interface ExecuTorchLLMConfig : NSObject<NSCopying>
23+
24+
/** Whether to echo the input prompt in the output. */
25+
@property(nonatomic, getter=isEchoEnabled) BOOL echoEnabled;
26+
27+
/** Maximum number of new tokens to generate. */
28+
@property(nonatomic) NSInteger maximumNewTokens;
29+
30+
/** Whether this is a warmup run. */
31+
@property(nonatomic, getter=isWarming) BOOL warming;
32+
33+
/** Maximum total sequence length. */
34+
@property(nonatomic) NSInteger sequenceLength;
35+
36+
/** Temperature for sampling. */
37+
@property(nonatomic) double temperature;
38+
39+
/** Number of BOS tokens to add. */
40+
@property(nonatomic) NSInteger bosCount;
41+
42+
/** Number of EOS tokens to add. */
43+
@property(nonatomic) NSInteger eosCount;
44+
45+
/**
46+
Initializes a configuration and invokes the block to mutate it.
47+
48+
@param block A block that receives the newly initialized configuration.
49+
@return An initialized ExecuTorchLLMConfig instance.
50+
*/
51+
- (instancetype)initWithBlock:(NS_NOESCAPE void (^)(ExecuTorchLLMConfig *))block
52+
NS_SWIFT_NAME(init(_:));
53+
54+
@end
55+
56+
NS_ASSUME_NONNULL_END
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#import "ExecuTorchLLMConfig.h"
10+
11+
#import <executorch/extension/llm/runner/irunner.h>
12+
13+
using namespace executorch::extension;
14+
15+
@interface ExecuTorchLLMConfig ()
16+
17+
- (const llm::GenerationConfig &)nativeConfig;
18+
19+
@end
20+
21+
@implementation ExecuTorchLLMConfig {
22+
std::unique_ptr<llm::GenerationConfig> _config;
23+
}
24+
25+
@dynamic echoEnabled;
26+
@dynamic maximumNewTokens;
27+
@dynamic warming;
28+
@dynamic sequenceLength;
29+
@dynamic temperature;
30+
@dynamic bosCount;
31+
@dynamic eosCount;
32+
33+
- (instancetype)init {
34+
if (self = [super init]) {
35+
_config = std::make_unique<llm::GenerationConfig>();
36+
}
37+
return self;
38+
}
39+
40+
- (instancetype)initWithBlock:(NS_NOESCAPE void (^)(ExecuTorchLLMConfig *))block {
41+
if (self = [self init]) {
42+
if (block) {
43+
block(self);
44+
}
45+
}
46+
return self;
47+
}
48+
49+
- (id)copyWithZone:(NSZone *)zone {
50+
ExecuTorchLLMConfig *config = [[[self class] allocWithZone:zone] init];
51+
*config->_config = *_config;
52+
return config;
53+
}
54+
55+
- (const llm::GenerationConfig &)nativeConfig {
56+
return *_config;
57+
}
58+
59+
- (BOOL)echoEnabled {
60+
return _config->echo;
61+
}
62+
63+
- (void)setEchoEnabled:(BOOL)echoEnabled {
64+
_config->echo = echoEnabled;
65+
}
66+
67+
- (NSInteger)maximumNewTokens {
68+
return _config->max_new_tokens;
69+
}
70+
71+
- (void)setMaximumNewTokens:(NSInteger)maximumNewTokens {
72+
_config->max_new_tokens = (int32_t)maximumNewTokens;
73+
}
74+
75+
- (BOOL)warming {
76+
return _config->warming;
77+
}
78+
79+
- (void)setWarming:(BOOL)warming {
80+
_config->warming = warming;
81+
}
82+
83+
- (NSInteger)sequenceLength {
84+
return _config->seq_len;
85+
}
86+
87+
- (void)setSequenceLength:(NSInteger)sequenceLength {
88+
_config->seq_len = (int32_t)sequenceLength;
89+
}
90+
91+
- (double)temperature {
92+
return _config->temperature;
93+
}
94+
95+
- (void)setTemperature:(double)temperature {
96+
_config->temperature = (float)temperature;
97+
}
98+
99+
- (NSInteger)bosCount {
100+
return _config->num_bos;
101+
}
102+
103+
- (void)setBosCount:(NSInteger)bosCount {
104+
_config->num_bos = (int32_t)bosCount;
105+
}
106+
107+
- (NSInteger)eosCount {
108+
return _config->num_eos;
109+
}
110+
111+
- (void)setEosCount:(NSInteger)eosCount {
112+
_config->num_eos = (int32_t)eosCount;
113+
}
114+
115+
@end

extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9-
#import <Foundation/Foundation.h>
9+
#import "ExecuTorchLLMConfig.h"
1010

1111
NS_ASSUME_NONNULL_BEGIN
1212

@@ -26,6 +26,7 @@ typedef NS_ENUM(NSInteger, ExecuTorchLLMMultimodalInputType) {
2626
*/
2727
NS_SWIFT_NAME(Image)
2828
__attribute__((deprecated("This API is experimental.")))
29+
__attribute__((objc_subclassing_restricted))
2930
@interface ExecuTorchLLMImage : NSObject<NSCopying>
3031

3132
/**
@@ -44,8 +45,11 @@ __attribute__((deprecated("This API is experimental.")))
4445
NS_DESIGNATED_INITIALIZER;
4546

4647
@property(nonatomic, readonly) NSData *data;
48+
4749
@property(nonatomic, readonly) NSInteger width;
50+
4851
@property(nonatomic, readonly) NSInteger height;
52+
4953
@property(nonatomic, readonly) NSInteger channels;
5054

5155
+ (instancetype)new NS_UNAVAILABLE;
@@ -58,6 +62,7 @@ __attribute__((deprecated("This API is experimental.")))
5862
*/
5963
NS_SWIFT_NAME(Audio)
6064
__attribute__((deprecated("This API is experimental.")))
65+
__attribute__((objc_subclassing_restricted))
6166
@interface ExecuTorchLLMAudio : NSObject<NSCopying>
6267

6368
/**
@@ -76,8 +81,11 @@ __attribute__((deprecated("This API is experimental.")))
7681
NS_DESIGNATED_INITIALIZER;
7782

7883
@property(nonatomic, readonly) NSData *data;
84+
7985
@property(nonatomic, readonly) NSInteger batchSize;
86+
8087
@property(nonatomic, readonly) NSInteger bins;
88+
8189
@property(nonatomic, readonly) NSInteger frames;
8290

8391
+ (instancetype)new NS_UNAVAILABLE;
@@ -91,6 +99,7 @@ __attribute__((deprecated("This API is experimental.")))
9199
*/
92100
NS_SWIFT_NAME(MultimodalInput)
93101
__attribute__((deprecated("This API is experimental.")))
102+
__attribute__((objc_subclassing_restricted))
94103
@interface ExecuTorchLLMMultimodalInput : NSObject<NSCopying>
95104

96105
/**
@@ -124,8 +133,11 @@ __attribute__((deprecated("This API is experimental.")))
124133
NS_RETURNS_RETAINED;
125134

126135
@property(nonatomic, readonly) ExecuTorchLLMMultimodalInputType type;
136+
127137
@property(nonatomic, readonly, nullable) NSString *text;
138+
128139
@property(nonatomic, readonly, nullable) ExecuTorchLLMImage *image;
140+
129141
@property(nonatomic, readonly, nullable) ExecuTorchLLMAudio *audio;
130142

131143
+ (instancetype)new NS_UNAVAILABLE;
@@ -134,12 +146,13 @@ __attribute__((deprecated("This API is experimental.")))
134146
@end
135147

136148
/**
137-
A wrapper class for the C++ llm::MultimodalLLMRunner that provides
149+
A wrapper class for the C++ llm::MultimodalRunner that provides
138150
Objective-C APIs to load models, manage tokenization, accept mixed
139151
input modalities, generate text sequences, and stop the runner.
140152
*/
141153
NS_SWIFT_NAME(MultimodalRunner)
142154
__attribute__((deprecated("This API is experimental.")))
155+
__attribute__((objc_subclassing_restricted))
143156
@interface ExecuTorchLLMMultimodalRunner : NSObject
144157

145158
/**
@@ -169,29 +182,32 @@ __attribute__((deprecated("This API is experimental.")))
169182
- (BOOL)loadWithError:(NSError **)error;
170183

171184
/**
172-
Generates text given a list of multimodal inputs, up to a specified sequence length.
173-
Invokes the provided callback for each generated token.
185+
Generates text given a list of multimodal inputs. A default configuration
186+
is created and passed to the configuration block for in-place mutation.
174187
175-
@param inputs An ordered array of multimodal inputs.
176-
@param seq_len The maximum number of tokens to generate.
177-
@param callback A block called with each generated token as an NSString.
178-
@param error On failure, populated with an NSError explaining the issue.
188+
The token callback, if provided, is invoked for each generated token.
189+
190+
@param inputs An ordered array of multimodal inputs.
191+
@param config A configuration object.
192+
@param callback A block called with each generated token as an NSString.
193+
@param error On failure, populated with an NSError explaining the issue.
179194
@return YES if generation completes successfully, NO if an error occurred.
180195
*/
181196
- (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
182-
sequenceLength:(NSInteger)seq_len
197+
config:(ExecuTorchLLMConfig *)config
183198
withTokenCallback:(nullable void (^)(NSString *))callback
184-
error:(NSError **)error;
199+
error:(NSError **)error
200+
NS_SWIFT_NAME(generate(_:_:tokenCallback:));
185201

186202
/**
187203
Stop producing new tokens and terminate the current generation process.
188204
*/
189205
- (void)stop;
190206

191207
/**
192-
Remove the prefilled tokens from the KV cache and resets the start position
193-
to 0. It also clears the stats for previous runs.
194-
*/
208+
Remove the prefilled tokens from the KV cache and reset the start position
209+
to 0. It also clears the stats for previous runs.
210+
*/
195211
- (void)reset;
196212

197213
+ (instancetype)new NS_UNAVAILABLE;

extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@
1515
using namespace executorch::extension;
1616
using namespace executorch::runtime;
1717

18+
@interface ExecuTorchLLMConfig ()
19+
20+
- (const llm::GenerationConfig &)nativeConfig;
21+
22+
@end
23+
1824
@implementation ExecuTorchLLMImage
1925

2026
- (instancetype)initWithData:(NSData *)data
@@ -157,7 +163,7 @@ - (BOOL)loadWithError:(NSError**)error {
157163
}
158164

159165
- (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
160-
sequenceLength:(NSInteger)seq_len
166+
config:(ExecuTorchLLMConfig *)config
161167
withTokenCallback:(nullable void (^)(NSString *))callback
162168
error:(NSError **)error {
163169
if (![self loadWithError:error]) {
@@ -192,7 +198,7 @@ - (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
192198
}
193199
auto status = _runner->generate(
194200
std::move(nativeInputs),
195-
llm::GenerationConfig{.seq_len = static_cast<int32_t>(seq_len)},
201+
config.nativeConfig,
196202
[callback](const std::string& token) {
197203
if (callback) {
198204
callback(@(token.c_str()));

extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9-
#import <Foundation/Foundation.h>
9+
#import "ExecuTorchLLMConfig.h"
1010

1111
NS_ASSUME_NONNULL_BEGIN
1212

@@ -49,29 +49,32 @@ __attribute__((deprecated("This API is experimental.")))
4949
- (BOOL)loadWithError:(NSError **)error;
5050

5151
/**
52-
Generates text given an input prompt, up to a specified sequence length.
53-
Invokes the provided callback for each generated token.
52+
Generates text given an input prompt. A default configuration
53+
is created and passed to the configuration block for in-place mutation.
5454
55-
@param prompt The initial text prompt to generate from.
56-
@param seq_len The maximum number of tokens to generate.
57-
@param callback A block called with each generated token as an NSString.
58-
@param error On failure, populated with an NSError explaining the issue.
55+
The token callback, if provided, is invoked for each generated token.
56+
57+
@param prompt The initial text prompt to generate from.
58+
@param config A configuration object.
59+
@param callback A block called with each generated token as an NSString.
60+
@param error On failure, populated with an NSError explaining the issue.
5961
@return YES if generation completes successfully, NO if an error occurred.
6062
*/
6163
- (BOOL)generate:(NSString *)prompt
62-
sequenceLength:(NSInteger)seq_len
63-
withTokenCallback:(nullable void (^)(NSString *))callback
64-
error:(NSError **)error;
64+
config:(ExecuTorchLLMConfig *)config
65+
withTokenCallback:(nullable void (^)(NSString *token))callback
66+
error:(NSError **)error
67+
NS_SWIFT_NAME(generate(_:_:tokenCallback:));
6568

6669
/**
6770
Stop producing new tokens and terminate the current generation process.
6871
*/
6972
- (void)stop;
7073

7174
/**
72-
Remove the prefilled tokens from the KV cache and resets the start position
73-
to 0. It also clears the stats for previous runs.
74-
*/
75+
Remove the prefilled tokens from the KV cache and reset the start position
76+
to 0. It also clears the stats for previous runs.
77+
*/
7578
- (void)reset;
7679

7780
+ (instancetype)new NS_UNAVAILABLE;

0 commit comments

Comments
 (0)