Skip to content

Commit 87aab1e

Browse files
committed
在 @src/autocoder/plugins/token_helper_plugin.py 中,count_tokens_in_file 需要支持三种情况, args 如果是@开头的,则要去掉 @, 然后将其作为路径. 之后,要判断是目录还是文件,如果是目录,则要递归统计累计值,如果是文件,则走原来逻辑.修改要漂亮精简.
auto_coder_000000000188_chat_action.yml
1 parent 708c07e commit 87aab1e

File tree

1 file changed

+107
-7
lines changed

1 file changed

+107
-7
lines changed

src/autocoder/plugins/token_helper_plugin.py

Lines changed: 107 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def get_commands(self) -> Dict[str, Tuple[Callable, str]]:
6868
return {
6969
"token/count": (self.count_tokens_in_project, "Count tokens in all project files"),
7070
"token/top": (self.show_top_token_files, "Show top N files by token count"),
71-
"token/file": (self.count_tokens_in_file, "Count tokens in a specific file"),
71+
"token/file": (self.count_tokens_in_file, "Count tokens in a specific file or directory"),
7272
"token/summary": (self.show_token_summary, "Show token count summary for the project"),
7373
}
7474

@@ -274,17 +274,46 @@ def show_top_token_files(self, args: str) -> None:
274274
print(f"{token_count.tokens:<10,} {token_count.file_size:<15,} {relative_path}")
275275

276276
def count_tokens_in_file(self, args: str) -> None:
277-
"""Count tokens in a specific file.
277+
"""Count tokens in a specific file or directory.
278278
279279
Args:
280-
args: Path to the file
280+
args: Path to the file or directory. If starts with @, remove @ and treat as path.
281281
"""
282282
if not args:
283-
print("Please specify a file path.")
283+
print("Please specify a file or directory path.")
284284
return
285285

286-
file_path = args.strip()
286+
# Handle @ prefix - remove it and treat as path
287+
path = args.strip()
288+
if path.startswith('@'):
289+
path = path[1:]
287290

291+
if not os.path.exists(path):
292+
print(f"Error: Path '{path}' does not exist.")
293+
return
294+
295+
try:
296+
if os.path.isfile(path):
297+
# Handle single file
298+
self._count_tokens_single_file(path)
299+
elif os.path.isdir(path):
300+
# Handle directory recursively
301+
self._count_tokens_directory(path)
302+
else:
303+
print(f"Error: '{path}' is neither a file nor a directory.")
304+
305+
except Exception as e:
306+
print(f"Error counting tokens: {str(e)}")
307+
308+
def _count_tokens_single_file(self, file_path: str) -> int:
309+
"""Count tokens in a single file and display results.
310+
311+
Args:
312+
file_path: Path to the file
313+
314+
Returns:
315+
Number of tokens in the file
316+
"""
288317
try:
289318
with open(file_path, 'r', encoding='utf-8') as f:
290319
content = f.read()
@@ -293,10 +322,81 @@ def count_tokens_in_file(self, args: str) -> None:
293322
print(f"\nFile: {file_path}")
294323
print(f"Tokens: {tokens:,}")
295324
print(f"File size: {len(content):,} bytes")
296-
print(f"Avg bytes per token: {len(content)/tokens:.2f}")
325+
if tokens > 0:
326+
print(f"Avg bytes per token: {len(content)/tokens:.2f}")
327+
328+
return tokens
297329

330+
except UnicodeDecodeError:
331+
print(f"Warning: Skipping binary file '{file_path}'")
332+
return 0
298333
except Exception as e:
299-
print(f"Error counting tokens in file: {str(e)}")
334+
print(f"Error reading file '{file_path}': {str(e)}")
335+
return 0
336+
337+
def _count_tokens_directory(self, dir_path: str) -> None:
338+
"""Count tokens in all files within a directory recursively.
339+
340+
Args:
341+
dir_path: Path to the directory
342+
"""
343+
total_tokens = 0
344+
file_count = 0
345+
processed_files = []
346+
347+
print(f"\nScanning directory: {dir_path}")
348+
349+
for root, dirs, files in os.walk(dir_path):
350+
# Skip common ignore directories
351+
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['__pycache__', 'node_modules', 'dist', 'build']]
352+
353+
for file in files:
354+
# Skip hidden files and common binary/generated files
355+
if file.startswith('.') or file.endswith(('.pyc', '.pyo', '.so', '.dll', '.exe', '.bin')):
356+
continue
357+
358+
file_path = os.path.join(root, file)
359+
relative_path = os.path.relpath(file_path, dir_path)
360+
361+
try:
362+
with open(file_path, 'r', encoding='utf-8') as f:
363+
content = f.read()
364+
365+
tokens = count_tokens(content)
366+
total_tokens += tokens
367+
file_count += 1
368+
369+
processed_files.append({
370+
'path': relative_path,
371+
'tokens': tokens,
372+
'size': len(content)
373+
})
374+
375+
except (UnicodeDecodeError, PermissionError):
376+
# Skip binary files and files without permission
377+
continue
378+
except Exception as e:
379+
print(f"Warning: Error processing '{relative_path}': {str(e)}")
380+
continue
381+
382+
# Display results
383+
print(f"\nDirectory scan complete!")
384+
print(f"Total files processed: {file_count}")
385+
print(f"Total tokens: {total_tokens:,}")
386+
387+
if file_count > 0:
388+
avg_tokens = total_tokens / file_count
389+
print(f"Average tokens per file: {avg_tokens:.2f}")
390+
391+
# Show top 10 files by token count
392+
if len(processed_files) > 1:
393+
print(f"\nTop files by token count:")
394+
sorted_files = sorted(processed_files, key=lambda x: x['tokens'], reverse=True)
395+
print(f"{'Tokens':>8} {'Size':>8} {'File'}")
396+
print(f"{'-'*8} {'-'*8} {'-'*50}")
397+
398+
for file_info in sorted_files[:10]:
399+
print(f"{file_info['tokens']:>8,} {file_info['size']:>8} {file_info['path']}")
300400

301401
def show_token_summary(self, args: str) -> None:
302402
"""Show token count summary by file type.

0 commit comments

Comments
 (0)