File tree Expand file tree Collapse file tree 1 file changed +0
-12
lines changed
Expand file tree Collapse file tree 1 file changed +0
-12
lines changed Original file line number Diff line number Diff line change @@ -151,18 +151,9 @@ def load_all_pages(ai_dir: Path) -> List[AiPage]:
151151# ----------------------------
152152
153153def _heuristic_token_count (s : str ) -> int :
154- """
155- Dependency-free token estimate:
156- - counts words and standalone punctuation
157- - decent for prose and code; model-agnostic
158- """
159154 return len (re .findall (r"\w+|[^\s\w]" , s , flags = re .UNICODE ))
160155
161156def _cl100k_token_count (s : str ) -> int :
162- """
163- Optional: if tiktoken is installed and estimator name is 'cl100k',
164- compute tokens via cl100k_base; otherwise fall back to heuristic.
165- """
166157 try :
167158 import tiktoken # type: ignore
168159 enc = tiktoken .get_encoding ("cl100k_base" )
@@ -171,11 +162,8 @@ def _cl100k_token_count(s: str) -> int:
171162 return _heuristic_token_count (s )
172163
173164def estimate_tokens (text : str , estimator : str = "heuristic-v1" ) -> int :
174- if estimator == "heuristic-v1" :
175- return _heuristic_token_count (text )
176165 if estimator == "cl100k" :
177166 return _cl100k_token_count (text )
178- # Unknown/custom estimator name → compute via heuristic but keep the label in outputs.
179167 return _heuristic_token_count (text )
180168
181169
You can’t perform that action at this time.
0 commit comments