77#include " ort_genai.h"
88#include " ort_genai_c.h"
99
10+
11+ const size_t kMaxTokens = 200 ;
12+
1013@interface GenAIGenerator () {
1114 std::unique_ptr<OgaModel> model;
1215 std::unique_ptr<OgaTokenizer> tokenizer;
@@ -29,6 +32,8 @@ - (instancetype)init {
2932
3033- (void )generate : (nonnull NSString *)input_user_question {
3134 std::vector<long long > tokenTimes; // per-token generation times
35+ tokenTimes.reserve (kMaxTokens );
36+
3237 TimePoint startTime, firstTokenTime, tokenStartTime;
3338
3439 try {
@@ -60,7 +65,7 @@ - (void)generate:(nonnull NSString*)input_user_question {
6065
6166 NSLog (@" Setting generator parameters..." );
6267 auto params = OgaGeneratorParams::Create (*self->model );
63- params->SetSearchOption (" max_length" , 200 );
68+ params->SetSearchOption (" max_length" , kMaxTokens );
6469 params->SetInputSequences (*sequences);
6570
6671 auto generator = OgaGenerator::Create (*self->model , *params);
@@ -86,7 +91,7 @@ - (void)generate:(nonnull NSString*)input_user_question {
8691 const char * decode_tokens = tokenizer_stream->Decode (seq[seq_len - 1 ]);
8792
8893 if (!decode_tokens) {
89- @ throw [ NSException exceptionWithName: @" TokenDecodeError " reason: @ " Token decoding failed." userInfo: nil ] ;
94+ throw std::runtime_error ( " Token decoding failed." ) ;
9095 }
9196
9297 // Measure token generation time excluding logging
0 commit comments