1+ import assert from "@/utils/assert" ;
12import type { CmuxMessage } from "@/types/message" ;
23import { createCmuxMessage } from "@/types/message" ;
34import type { HistoryService } from "@/services/historyService" ;
45import type { Result } from "@/types/result" ;
56import { Ok , Err } from "@/types/result" ;
67import type { SendMessageError } from "@/types/errors" ;
78import type { AIService } from "@/services/aiService" ;
9+ import { log } from "@/services/log" ;
810import type {
911 MockAssistantEvent ,
1012 MockStreamErrorEvent ,
@@ -17,6 +19,91 @@ import type { ToolCallStartEvent, ToolCallEndEvent } from "@/types/stream";
1719import type { ReasoningDeltaEvent } from "@/types/stream" ;
1820import { getTokenizerForModel } from "@/utils/main/tokenizer" ;
1921
22+ const MOCK_TOKENIZER_MODEL = "openai:gpt-5" ;
23+ const TOKENIZE_TIMEOUT_MS = 150 ;
24+ let tokenizerFallbackLogged = false ;
25+
26+ function approximateTokenCount ( text : string ) : number {
27+ const normalizedLength = text . trim ( ) . length ;
28+ if ( normalizedLength === 0 ) {
29+ return 0 ;
30+ }
31+ return Math . max ( 1 , Math . ceil ( normalizedLength / 4 ) ) ;
32+ }
33+
34+ async function tokenizeWithMockModel ( text : string , context : string ) : Promise < number > {
35+ assert ( typeof text === "string" , `Mock scenario ${ context } expects string input` ) ;
36+ const approximateTokens = approximateTokenCount ( text ) ;
37+ let fallbackUsed = false ;
38+ let timeoutId : NodeJS . Timeout | undefined ;
39+
40+ const fallbackPromise = new Promise < number > ( ( resolve ) => {
41+ timeoutId = setTimeout ( ( ) => {
42+ fallbackUsed = true ;
43+ resolve ( approximateTokens ) ;
44+ } , TOKENIZE_TIMEOUT_MS ) ;
45+ } ) ;
46+
47+ const actualPromise = ( async ( ) => {
48+ const tokenizer = await getTokenizerForModel ( MOCK_TOKENIZER_MODEL ) ;
49+ assert (
50+ typeof tokenizer . encoding === "string" && tokenizer . encoding . length > 0 ,
51+ `Tokenizer for ${ MOCK_TOKENIZER_MODEL } must expose a non-empty encoding`
52+ ) ;
53+ const tokens = await tokenizer . countTokens ( text ) ;
54+ assert (
55+ Number . isFinite ( tokens ) && tokens >= 0 ,
56+ `Tokenizer for ${ MOCK_TOKENIZER_MODEL } returned invalid token count`
57+ ) ;
58+ return tokens ;
59+ } ) ( ) ;
60+
61+ let tokens : number ;
62+ try {
63+ tokens = await Promise . race ( [ actualPromise , fallbackPromise ] ) ;
64+ } catch ( error ) {
65+ if ( timeoutId !== undefined ) {
66+ clearTimeout ( timeoutId ) ;
67+ }
68+ const errorMessage = error instanceof Error ? error . message : String ( error ) ;
69+ throw new Error (
70+ `[MockScenarioPlayer] Failed to tokenize ${ context } with ${ MOCK_TOKENIZER_MODEL } : ${ errorMessage } `
71+ ) ;
72+ }
73+
74+ if ( ! fallbackUsed && timeoutId !== undefined ) {
75+ clearTimeout ( timeoutId ) ;
76+ }
77+
78+ actualPromise
79+ . then ( ( resolvedTokens ) => {
80+ if ( fallbackUsed && ! tokenizerFallbackLogged ) {
81+ tokenizerFallbackLogged = true ;
82+ log . debug (
83+ `[MockScenarioPlayer] Tokenizer fallback used for ${ context } ; emitted ${ approximateTokens } , background tokenizer returned ${ resolvedTokens } `
84+ ) ;
85+ }
86+ } )
87+ . catch ( ( error ) => {
88+ if ( fallbackUsed && ! tokenizerFallbackLogged ) {
89+ tokenizerFallbackLogged = true ;
90+ const errorMessage = error instanceof Error ? error . message : String ( error ) ;
91+ log . debug (
92+ `[MockScenarioPlayer] Tokenizer fallback used for ${ context } ; background error: ${ errorMessage } `
93+ ) ;
94+ }
95+ } ) ;
96+
97+ if ( fallbackUsed ) {
98+ assert (
99+ Number . isFinite ( tokens ) && tokens >= 0 ,
100+ `Token fallback produced invalid count for ${ context } `
101+ ) ;
102+ }
103+
104+ return tokens ;
105+ }
106+
20107interface MockPlayerDeps {
21108 aiService : AIService ;
22109 historyService : HistoryService ;
@@ -159,8 +246,7 @@ export class MockScenarioPlayer {
159246 }
160247 case "reasoning-delta" : {
161248 // Mock scenarios use the same tokenization logic as real streams for consistency
162- const tokenizer = await getTokenizerForModel ( "gpt-4" ) ; // Mock uses GPT-4 tokenizer
163- const tokens = await tokenizer . countTokens ( event . text ) ;
249+ const tokens = await tokenizeWithMockModel ( event . text , "reasoning-delta text" ) ;
164250 const payload : ReasoningDeltaEvent = {
165251 type : "reasoning-delta" ,
166252 workspaceId,
@@ -175,8 +261,7 @@ export class MockScenarioPlayer {
175261 case "tool-start" : {
176262 // Mock scenarios use the same tokenization logic as real streams for consistency
177263 const inputText = JSON . stringify ( event . args ) ;
178- const tokenizer = await getTokenizerForModel ( "gpt-4" ) ; // Mock uses GPT-4 tokenizer
179- const tokens = await tokenizer . countTokens ( inputText ) ;
264+ const tokens = await tokenizeWithMockModel ( inputText , "tool-call args" ) ;
180265 const payload : ToolCallStartEvent = {
181266 type : "tool-call-start" ,
182267 workspaceId,
@@ -204,8 +289,13 @@ export class MockScenarioPlayer {
204289 }
205290 case "stream-delta" : {
206291 // Mock scenarios use the same tokenization logic as real streams for consistency
207- const tokenizer = await getTokenizerForModel ( "gpt-4" ) ; // Mock uses GPT-4 tokenizer
208- const tokens = await tokenizer . countTokens ( event . text ) ;
292+ let tokens : number ;
293+ try {
294+ tokens = await tokenizeWithMockModel ( event . text , "stream-delta text" ) ;
295+ } catch ( error ) {
296+ console . error ( "[MockScenarioPlayer] tokenize failed for stream-delta" , error ) ;
297+ throw error ;
298+ }
209299 const payload : StreamDeltaEvent = {
210300 type : "stream-delta" ,
211301 workspaceId,
0 commit comments