1- import { ChatMessage } from 'gpt-tokenizer/GptEncoding' ;
21import { GenkitPluginV2 } from 'genkit/plugin' ;
32import { openAI } from '@genkit-ai/compat-oai/openai' ;
43import { RateLimiter } from 'limiter' ;
76 PromptDataForCounting ,
87 RateLimitConfig ,
98} from '../model-provider.js' ;
10- import o3 from 'gpt-tokenizer/model/o3' ;
11- import o4Mini from 'gpt-tokenizer/model/o4-mini' ;
9+ import { encoding_for_model } from 'tiktoken' ;
1210
1311export class OpenAiModelProvider extends GenkitModelProvider {
1412 readonly apiKeyVariableName = 'OPENAI_API_KEY' ;
@@ -19,6 +17,21 @@ export class OpenAiModelProvider extends GenkitModelProvider {
1917 'openai-gpt-5' : ( ) => openAI . model ( 'gpt-5' ) ,
2018 } ;
2119
20+ private countTokensForModel (
21+ modelName : Parameters < typeof encoding_for_model > [ 0 ] ,
22+ prompt : PromptDataForCounting
23+ ) : number {
24+ const encoding = encoding_for_model ( modelName ) ;
25+ try {
26+ const messages = this . genkitPromptToOpenAi ( prompt ) ;
27+ const text = messages . map ( ( m ) => `${ m . role } : ${ m . content } ` ) . join ( '\n' ) ;
28+ const tokens = encoding . encode ( text ) ;
29+ return tokens . length ;
30+ } finally {
31+ encoding . free ( ) ;
32+ }
33+ }
34+
2235 protected rateLimitConfig : Record < string , RateLimitConfig > = {
2336 // See: https://platform.openai.com/docs/models/o3
2437 'openai/o3' : {
@@ -30,8 +43,7 @@ export class OpenAiModelProvider extends GenkitModelProvider {
3043 tokensPerInterval : 30_000 * 0.75 , // *0.75 to be more resilient to token count deviations
3144 interval : 1000 * 60 * 1.5 , // Refresh tokens after 1.5 minutes to be on the safe side.
3245 } ) ,
33- countTokens : async ( prompt ) =>
34- o3 . countTokens ( this . genkitPromptToOpenAi ( prompt ) ) ,
46+ countTokens : async ( prompt ) => this . countTokensForModel ( 'gpt-4o' , prompt ) ,
3547 } ,
3648 // See https://platform.openai.com/docs/models/o4-mini
3749 'openai/o4-mini' : {
@@ -44,7 +56,7 @@ export class OpenAiModelProvider extends GenkitModelProvider {
4456 interval : 1000 * 60 * 1.5 , // Refresh tokens after 1.5 minutes to be on the safe side.
4557 } ) ,
4658 countTokens : async ( prompt ) =>
47- o4Mini . countTokens ( this . genkitPromptToOpenAi ( prompt ) ) ,
59+ this . countTokensForModel ( 'gpt-4o-mini' , prompt ) ,
4860 } ,
4961 // See: https://platform.openai.com/docs/models/gpt-5
5062 'openai/gpt-5' : {
@@ -56,10 +68,7 @@ export class OpenAiModelProvider extends GenkitModelProvider {
5668 tokensPerInterval : 30_000 * 0.75 , // *0.75 to be more resilient to token count deviations
5769 interval : 1000 * 60 * 1.5 , // Refresh tokens after 1.5 minutes to be on the safe side.
5870 } ) ,
59- // TODO: at the time of writing, the `gpt-tokenizer` doesn't support gpt-5.
60- // See https://github.com/niieani/gpt-tokenizer/issues/73
61- countTokens : async ( prompt ) =>
62- o3 . countTokens ( this . genkitPromptToOpenAi ( prompt ) ) ,
71+ countTokens : async ( prompt ) => this . countTokensForModel ( 'gpt-5' , prompt ) ,
6372 } ,
6473 } ;
6574
@@ -72,8 +81,10 @@ export class OpenAiModelProvider extends GenkitModelProvider {
7281 return { } ;
7382 }
7483
75- private genkitPromptToOpenAi ( prompt : PromptDataForCounting ) : ChatMessage [ ] {
76- const openAiPrompt : string | ChatMessage [ ] = [ ] ;
84+ private genkitPromptToOpenAi (
85+ prompt : PromptDataForCounting
86+ ) : Array < { role : string ; content : string } > {
87+ const openAiPrompt : Array < { role : string ; content : string } > = [ ] ;
7788 for ( const part of prompt . messages ) {
7889 for ( const c of part . content ) {
7990 openAiPrompt . push ( {
@@ -82,6 +93,6 @@ export class OpenAiModelProvider extends GenkitModelProvider {
8293 } ) ;
8394 }
8495 }
85- return [ ...openAiPrompt , { content : prompt . prompt } ] ;
96+ return [ ...openAiPrompt , { role : 'user' , content : prompt . prompt } ] ;
8697 }
8798}
0 commit comments