File tree Expand file tree Collapse file tree 3 files changed +7
-5
lines changed Expand file tree Collapse file tree 3 files changed +7
-5
lines changed File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change 3939#这里并不是简单的将TAB替换成4个空格
4040#空格个数到底是多少需要计算,因为TAB制表本身有自动对齐的功能
4141def tab2spaces (line ):
42- list_str = list (line ) #字符串变成列表
42+ list_str = list (line ) #字符串打散成列表,放边操作
4343 i = list_str .count ('\t ' )
4444
4545 while i > 0 :
@@ -78,13 +78,15 @@ def format_codes(filename):
7878
7979def get_encode_info (file ):
8080 with open (file , 'rb' ) as f :
81- code = chardet .detect (f .read ())['encoding' ]
82- if code == 'EUC-JP' : #chardet库容易将含着少量中文的英文字符文档识别为日语编码格式
83- code = 'GB2312'
81+ code = chardet .detect (f .read ())['encoding' ]
82+ #charde库有一定几率对当前文件的编码识别不准确
83+ if code == 'EUC-JP' : #容易将含着少量中文的英文字符文档识别为日语编码格式
84+ code = 'GB2312'
8485 elif code == 'ISO-8859-1' : #部分文件GB2312码会被识别成ISO-8859-1
8586 code = 'GB2312'
8687
87- if not (code == 'ascii' or code == 'utf-8' or code == 'GB2312' or code == 'Windows-1252' ): # Windows-1252 是由于意法半导体是法国企业's的'是法语的'导致的
88+ if not (code == 'ascii' or code == 'utf-8' or code == 'GB2312' #编码识别正确
89+ or code == 'Windows-1252' ): # Windows-1252 是由于意法半导体是法国企业's的'是法语的'导致的
8890 if code != None :
8991 print ('未处理,需人工确认:' + code + ':' + file ) #需要人工确认
9092 code = None
You can’t perform that action at this time.
0 commit comments