1
- import { ChatMessage } from 'gpt-tokenizer/GptEncoding' ;
2
1
import { GenkitPluginV2 } from 'genkit/plugin' ;
3
2
import { openAI } from '@genkit-ai/compat-oai/openai' ;
4
3
import { RateLimiter } from 'limiter' ;
7
6
PromptDataForCounting ,
8
7
RateLimitConfig ,
9
8
} from '../model-provider.js' ;
10
- import o3 from 'gpt-tokenizer/model/o3' ;
11
- import o4Mini from 'gpt-tokenizer/model/o4-mini' ;
9
+ import { encoding_for_model } from 'tiktoken' ;
12
10
13
11
export class OpenAiModelProvider extends GenkitModelProvider {
14
12
readonly apiKeyVariableName = 'OPENAI_API_KEY' ;
@@ -19,6 +17,21 @@ export class OpenAiModelProvider extends GenkitModelProvider {
19
17
'openai-gpt-5' : ( ) => openAI . model ( 'gpt-5' ) ,
20
18
} ;
21
19
20
+ private countTokensForModel (
21
+ modelName : Parameters < typeof encoding_for_model > [ 0 ] ,
22
+ prompt : PromptDataForCounting
23
+ ) : number {
24
+ const encoding = encoding_for_model ( modelName ) ;
25
+ try {
26
+ const messages = this . genkitPromptToOpenAi ( prompt ) ;
27
+ const text = messages . map ( ( m ) => `${ m . role } : ${ m . content } ` ) . join ( '\n' ) ;
28
+ const tokens = encoding . encode ( text ) ;
29
+ return tokens . length ;
30
+ } finally {
31
+ encoding . free ( ) ;
32
+ }
33
+ }
34
+
22
35
protected rateLimitConfig : Record < string , RateLimitConfig > = {
23
36
// See: https://platform.openai.com/docs/models/o3
24
37
'openai/o3' : {
@@ -30,8 +43,7 @@ export class OpenAiModelProvider extends GenkitModelProvider {
30
43
tokensPerInterval : 30_000 * 0.75 , // *0.75 to be more resilient to token count deviations
31
44
interval : 1000 * 60 * 1.5 , // Refresh tokens after 1.5 minutes to be on the safe side.
32
45
} ) ,
33
- countTokens : async ( prompt ) =>
34
- o3 . countTokens ( this . genkitPromptToOpenAi ( prompt ) ) ,
46
+ countTokens : async ( prompt ) => this . countTokensForModel ( 'gpt-4o' , prompt ) ,
35
47
} ,
36
48
// See https://platform.openai.com/docs/models/o4-mini
37
49
'openai/o4-mini' : {
@@ -44,7 +56,7 @@ export class OpenAiModelProvider extends GenkitModelProvider {
44
56
interval : 1000 * 60 * 1.5 , // Refresh tokens after 1.5 minutes to be on the safe side.
45
57
} ) ,
46
58
countTokens : async ( prompt ) =>
47
- o4Mini . countTokens ( this . genkitPromptToOpenAi ( prompt ) ) ,
59
+ this . countTokensForModel ( 'gpt-4o-mini' , prompt ) ,
48
60
} ,
49
61
// See: https://platform.openai.com/docs/models/gpt-5
50
62
'openai/gpt-5' : {
@@ -56,10 +68,7 @@ export class OpenAiModelProvider extends GenkitModelProvider {
56
68
tokensPerInterval : 30_000 * 0.75 , // *0.75 to be more resilient to token count deviations
57
69
interval : 1000 * 60 * 1.5 , // Refresh tokens after 1.5 minutes to be on the safe side.
58
70
} ) ,
59
- // TODO: at the time of writing, the `gpt-tokenizer` doesn't support gpt-5.
60
- // See https://github.com/niieani/gpt-tokenizer/issues/73
61
- countTokens : async ( prompt ) =>
62
- o3 . countTokens ( this . genkitPromptToOpenAi ( prompt ) ) ,
71
+ countTokens : async ( prompt ) => this . countTokensForModel ( 'gpt-5' , prompt ) ,
63
72
} ,
64
73
} ;
65
74
@@ -72,8 +81,10 @@ export class OpenAiModelProvider extends GenkitModelProvider {
72
81
return { } ;
73
82
}
74
83
75
- private genkitPromptToOpenAi ( prompt : PromptDataForCounting ) : ChatMessage [ ] {
76
- const openAiPrompt : string | ChatMessage [ ] = [ ] ;
84
+ private genkitPromptToOpenAi (
85
+ prompt : PromptDataForCounting
86
+ ) : Array < { role : string ; content : string } > {
87
+ const openAiPrompt : Array < { role : string ; content : string } > = [ ] ;
77
88
for ( const part of prompt . messages ) {
78
89
for ( const c of part . content ) {
79
90
openAiPrompt . push ( {
@@ -82,6 +93,6 @@ export class OpenAiModelProvider extends GenkitModelProvider {
82
93
} ) ;
83
94
}
84
95
}
85
- return [ ...openAiPrompt , { content : prompt . prompt } ] ;
96
+ return [ ...openAiPrompt , { role : 'user' , content : prompt . prompt } ] ;
86
97
}
87
98
}
0 commit comments