77
88import  jedi 
99import  libcst  as  cst 
10- import  tiktoken 
1110from  jedi .api .classes  import  Name 
1211from  libcst  import  CSTNode 
1312
1413from  codeflash .cli_cmds .console  import  logger 
1514from  codeflash .code_utils .code_extractor  import  add_needed_imports_from_module , find_preexisting_objects 
16- from  codeflash .code_utils .code_utils  import  get_qualified_name , path_belongs_to_site_packages 
15+ from  codeflash .code_utils .code_utils  import  get_qualified_name , path_belongs_to_site_packages ,  encoded_tokens_len 
1716from  codeflash .context .unused_definition_remover  import  remove_unused_definitions_by_function_names 
1817from  codeflash .discovery .functions_to_optimize  import  FunctionToOptimize 
1918from  codeflash .models .models  import  (
@@ -73,8 +72,7 @@ def get_code_optimization_context(
7372    )
7473
7574    # Handle token limits 
76-     tokenizer  =  tiktoken .encoding_for_model ("gpt-4o" )
77-     final_read_writable_tokens  =  len (tokenizer .encode (final_read_writable_code ))
75+     final_read_writable_tokens  =  encoded_tokens_len (final_read_writable_code )
7876    if  final_read_writable_tokens  >  optim_token_limit :
7977        raise  ValueError ("Read-writable code has exceeded token limit, cannot proceed" )
8078
@@ -87,7 +85,7 @@ def get_code_optimization_context(
8785    )
8886    read_only_context_code  =  read_only_code_markdown .markdown 
8987
90-     read_only_code_markdown_tokens  =  len ( tokenizer . encode ( read_only_context_code ) )
88+     read_only_code_markdown_tokens  =  encoded_tokens_len ( read_only_context_code )
9189    total_tokens  =  final_read_writable_tokens  +  read_only_code_markdown_tokens 
9290    if  total_tokens  >  optim_token_limit :
9391        logger .debug ("Code context has exceeded token limit, removing docstrings from read-only code" )
@@ -96,7 +94,7 @@ def get_code_optimization_context(
9694            helpers_of_fto_dict , helpers_of_helpers_dict , project_root_path , remove_docstrings = True 
9795        )
9896        read_only_context_code  =  read_only_code_no_docstring_markdown .markdown 
99-         read_only_code_no_docstring_markdown_tokens  =  len ( tokenizer . encode ( read_only_context_code ) )
97+         read_only_code_no_docstring_markdown_tokens  =  encoded_tokens_len ( read_only_context_code )
10098        total_tokens  =  final_read_writable_tokens  +  read_only_code_no_docstring_markdown_tokens 
10199        if  total_tokens  >  optim_token_limit :
102100            logger .debug ("Code context has exceeded token limit, removing read-only code" )
@@ -111,7 +109,7 @@ def get_code_optimization_context(
111109        code_context_type = CodeContextType .TESTGEN ,
112110    )
113111    testgen_context_code  =  testgen_code_markdown .code 
114-     testgen_context_code_tokens  =  len ( tokenizer . encode ( testgen_context_code ) )
112+     testgen_context_code_tokens  =  encoded_tokens_len ( testgen_context_code )
115113    if  testgen_context_code_tokens  >  testgen_token_limit :
116114        testgen_code_markdown  =  extract_code_string_context_from_files (
117115            helpers_of_fto_dict ,
@@ -121,7 +119,7 @@ def get_code_optimization_context(
121119            code_context_type = CodeContextType .TESTGEN ,
122120        )
123121        testgen_context_code  =  testgen_code_markdown .code 
124-         testgen_context_code_tokens  =  len ( tokenizer . encode ( testgen_context_code ) )
122+         testgen_context_code_tokens  =  encoded_tokens_len ( testgen_context_code )
125123        if  testgen_context_code_tokens  >  testgen_token_limit :
126124            raise  ValueError ("Testgen code context has exceeded token limit, cannot proceed" )
127125
0 commit comments