Skip to content

Commit f5bfcd9

Browse files
committed
cleaning up
1 parent 33c8258 commit f5bfcd9

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

tests/test_code_utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from unittest.mock import MagicMock, patch
66

77
import pytest
8+
import tiktoken
89

910
from codeflash.code_utils.code_utils import (
1011
cleanup_paths,
@@ -22,6 +23,22 @@
2223
from codeflash.code_utils.concolic_utils import clean_concolic_tests
2324
from codeflash.code_utils.coverage_utils import generate_candidates, prepare_coverage_files
2425

26+
def test_encode_str():
27+
#not testing anything, just analyzing the behavior of encoding
28+
#print("\n")
29+
codebases_to_try = Path(Path(__file__).parent.resolve() / "../code_to_optimize/").glob("**/*.py")
30+
ave_ratio = []
31+
max_ratio_dict = dict()
32+
for code_fn in codebases_to_try:
33+
code_str = code_fn.read_text(encoding="utf-8")
34+
if not len(code_str) or "__init__.py" in str(code_fn):
35+
continue
36+
tokenizer = tiktoken.encoding_for_model("gpt-4o")
37+
tkt_encoded_str = tokenizer.encode(code_str)
38+
code_len = len(code_str)
39+
ave_ratio.append(len(tkt_encoded_str)/code_len)
40+
max_ratio_dict[len(tkt_encoded_str)/code_len] = code_fn
41+
print(sum(ave_ratio)/len(ave_ratio), min(ave_ratio), max(ave_ratio))
2542

2643
@pytest.fixture
2744
def multiple_existing_and_non_existing_files(tmp_path: Path) -> list[Path]:

0 commit comments

Comments
 (0)