@@ -42,6 +42,8 @@ def __init__(self, path, extract_content=True, max_files=1000, max_depth=5):
4242 '.png' , '.jpg' , '.jpeg' , '.gif' , '.bmp' , '.mp3' , '.mp4' ,
4343 '.avi' , '.mov' , '.pdf' , '.doc' , '.docx' , '.xls' , '.xlsx' , '.ppt' , '.pptx'
4444 }
45+ # 添加允许处理的扩展名(例如 .js)
46+ self .allowed_extensions = {'.js' , '.json' }
4547
4648 def run (self ):
4749 try :
@@ -123,7 +125,8 @@ def process_file(self, file_path):
123125 file_name = os .path .basename (file_path )
124126 _ , file_extension = os .path .splitext (file_name )
125127
126- if file_extension .lower () in self .skip_extensions :
128+ # 检查是否跳过该文件类型,除非在允许的扩展名列表中
129+ if file_extension .lower () in self .skip_extensions and file_extension .lower () not in self .allowed_extensions :
127130 self .update_signal .emit (file_name , "跳过该文件类型" , "skipped" , file_path )
128131 return
129132
@@ -139,10 +142,17 @@ def process_file(self, file_path):
139142 self .update_signal .emit (file_name , "文件过大,已跳过" , "skipped" , file_path )
140143 return
141144
142- # 检查是否是文本文件
145+ # 检查是否是文本文件或特定的 MIME 类型
143146 mime_type , _ = mimetypes .guess_type (file_path )
144- if mime_type and not mime_type .startswith ('text' ):
145- self .update_signal .emit (file_name , "非文本文件,已跳过" , "skipped" , file_path )
147+ # 添加 'application/javascript' 和 'application/json' 到允许的 MIME 类型
148+ allowed_mime_types = ['text' , 'application/javascript' , 'application/json' ]
149+ if mime_type :
150+ if not (mime_type .startswith ('text' ) or mime_type in allowed_mime_types ):
151+ self .update_signal .emit (file_name , "非文本文件,已跳过" , "skipped" , file_path )
152+ return
153+ else :
154+ # 如果无法猜测 MIME 类型,也可以选择跳过或处理
155+ self .update_signal .emit (file_name , "无法确定文件类型,已跳过" , "skipped" , file_path )
146156 return
147157
148158 content = self .read_file_content (file_path )
@@ -552,6 +562,7 @@ def count_total_files(self, path):
552562 total_files = 0
553563 max_depth = self .process_thread .max_depth
554564 skip_extensions = self .process_thread .skip_extensions
565+ allowed_extensions = self .process_thread .allowed_extensions
555566 extract_content = self .process_thread .extract_content
556567
557568 def count (path , depth = 0 ):
@@ -569,29 +580,31 @@ def count(path, depth=0):
569580 elif entry .is_file ():
570581 file_name = entry .name
571582 _ , file_extension = os .path .splitext (file_name )
572- if file_extension .lower () in skip_extensions :
583+ if file_extension .lower () in skip_extensions and file_extension . lower () not in allowed_extensions :
573584 continue
574585 if not extract_content :
575586 total_files += 1
576587 else :
577588 if entry .stat ().st_size > 10 * 1024 * 1024 :
578589 continue
579590 mime_type , _ = mimetypes .guess_type (entry .path )
580- if mime_type and not mime_type .startswith ('text' ):
591+ allowed_mime_types = ['text' , 'application/javascript' , 'application/json' ]
592+ if mime_type and not (mime_type .startswith ('text' ) or mime_type in allowed_mime_types ):
581593 continue
582594 total_files += 1
583595 elif os .path .isfile (path ):
584596 file_name = os .path .basename (path )
585597 _ , file_extension = os .path .splitext (file_name )
586- if file_extension .lower () in skip_extensions :
598+ if file_extension .lower () in skip_extensions and file_extension . lower () not in allowed_extensions :
587599 return
588600 if not extract_content :
589601 total_files += 1
590602 else :
591603 if os .path .getsize (path ) > 10 * 1024 * 1024 :
592604 return
593605 mime_type , _ = mimetypes .guess_type (path )
594- if mime_type and not mime_type .startswith ('text' ):
606+ allowed_mime_types = ['text' , 'application/javascript' , 'application/json' ]
607+ if mime_type and not (mime_type .startswith ('text' ) or mime_type in allowed_mime_types ):
595608 return
596609 total_files += 1
597610 except PermissionError as e :
0 commit comments