Skip to content

Commit 85df3ec

Browse files
committed
2024年12月23日
1 parent 930dd1c commit 85df3ec

File tree

1 file changed

+21
-8
lines changed

1 file changed

+21
-8
lines changed
Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ def __init__(self, path, extract_content=True, max_files=1000, max_depth=5):
4242
'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.mp3', '.mp4',
4343
'.avi', '.mov', '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'
4444
}
45+
# 添加允许处理的扩展名(例如 .js)
46+
self.allowed_extensions = {'.js', '.json'}
4547

4648
def run(self):
4749
try:
@@ -123,7 +125,8 @@ def process_file(self, file_path):
123125
file_name = os.path.basename(file_path)
124126
_, file_extension = os.path.splitext(file_name)
125127

126-
if file_extension.lower() in self.skip_extensions:
128+
# 检查是否跳过该文件类型,除非在允许的扩展名列表中
129+
if file_extension.lower() in self.skip_extensions and file_extension.lower() not in self.allowed_extensions:
127130
self.update_signal.emit(file_name, "跳过该文件类型", "skipped", file_path)
128131
return
129132

@@ -139,10 +142,17 @@ def process_file(self, file_path):
139142
self.update_signal.emit(file_name, "文件过大,已跳过", "skipped", file_path)
140143
return
141144

142-
# 检查是否是文本文件
145+
# 检查是否是文本文件或特定的 MIME 类型
143146
mime_type, _ = mimetypes.guess_type(file_path)
144-
if mime_type and not mime_type.startswith('text'):
145-
self.update_signal.emit(file_name, "非文本文件,已跳过", "skipped", file_path)
147+
# 添加 'application/javascript' 和 'application/json' 到允许的 MIME 类型
148+
allowed_mime_types = ['text', 'application/javascript', 'application/json']
149+
if mime_type:
150+
if not (mime_type.startswith('text') or mime_type in allowed_mime_types):
151+
self.update_signal.emit(file_name, "非文本文件,已跳过", "skipped", file_path)
152+
return
153+
else:
154+
# 如果无法猜测 MIME 类型,也可以选择跳过或处理
155+
self.update_signal.emit(file_name, "无法确定文件类型,已跳过", "skipped", file_path)
146156
return
147157

148158
content = self.read_file_content(file_path)
@@ -552,6 +562,7 @@ def count_total_files(self, path):
552562
total_files = 0
553563
max_depth = self.process_thread.max_depth
554564
skip_extensions = self.process_thread.skip_extensions
565+
allowed_extensions = self.process_thread.allowed_extensions
555566
extract_content = self.process_thread.extract_content
556567

557568
def count(path, depth=0):
@@ -569,29 +580,31 @@ def count(path, depth=0):
569580
elif entry.is_file():
570581
file_name = entry.name
571582
_, file_extension = os.path.splitext(file_name)
572-
if file_extension.lower() in skip_extensions:
583+
if file_extension.lower() in skip_extensions and file_extension.lower() not in allowed_extensions:
573584
continue
574585
if not extract_content:
575586
total_files += 1
576587
else:
577588
if entry.stat().st_size > 10 * 1024 * 1024:
578589
continue
579590
mime_type, _ = mimetypes.guess_type(entry.path)
580-
if mime_type and not mime_type.startswith('text'):
591+
allowed_mime_types = ['text', 'application/javascript', 'application/json']
592+
if mime_type and not (mime_type.startswith('text') or mime_type in allowed_mime_types):
581593
continue
582594
total_files += 1
583595
elif os.path.isfile(path):
584596
file_name = os.path.basename(path)
585597
_, file_extension = os.path.splitext(file_name)
586-
if file_extension.lower() in skip_extensions:
598+
if file_extension.lower() in skip_extensions and file_extension.lower() not in allowed_extensions:
587599
return
588600
if not extract_content:
589601
total_files += 1
590602
else:
591603
if os.path.getsize(path) > 10 * 1024 * 1024:
592604
return
593605
mime_type, _ = mimetypes.guess_type(path)
594-
if mime_type and not mime_type.startswith('text'):
606+
allowed_mime_types = ['text', 'application/javascript', 'application/json']
607+
if mime_type and not (mime_type.startswith('text') or mime_type in allowed_mime_types):
595608
return
596609
total_files += 1
597610
except PermissionError as e:

0 commit comments

Comments
 (0)