|
| 1 | +# |
| 2 | +# File : formatting.py |
| 3 | +# This file is part of RT-Thread RTOS |
| 4 | +# COPYRIGHT (C) 2006 - 2018, RT-Thread Development Team |
| 5 | +# |
| 6 | +# This program is free software; you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation; either version 2 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# This program is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License along |
| 17 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 18 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| 19 | +# |
| 20 | +# Change Logs: |
| 21 | +# Date Author Notes |
| 22 | +# 2021-03-02 Meco Man The first version |
| 23 | +# 2021-03-04 Meco Man 增加统一转换成UTF-8编码格式功能 |
| 24 | + |
| 25 | + |
| 26 | +#本文件会自动对指定路径下的所有文件包括子文件夹的文件(仅针对.c.h)进行扫描 |
| 27 | +# 1)将源文件编码统一为UTF-8; |
| 28 | +# 2)将TAB键替换为空格; |
| 29 | +# 3)将每行末尾多余的空格删除,并统一换行符为'\n'; |
| 30 | +#使用时只需要双击本文件,输入要扫描的文件夹路径即可 |
| 31 | +#不能保证100%全部成功转换为UTF-8,有一些编码特殊或识别不准确会在终端打印信息,需人工转换 |
| 32 | + |
| 33 | +#欢迎对本文件的功能继续做出补充,欢迎提交PR |
| 34 | + |
| 35 | +import os |
| 36 | +import chardet |
| 37 | + |
| 38 | +#用空格代替TAB键 |
| 39 | +#这里并不是简单的将TAB替换成4个空格 |
| 40 | +#空格个数到底是多少需要计算,因为TAB制表本身有自动对齐的功能 |
| 41 | +def tab2spaces(line): |
| 42 | + list_str = list(line) #字符串打散成列表,放边操作 |
| 43 | + i = list_str.count('\t') |
| 44 | + |
| 45 | + while i > 0: |
| 46 | + ptr = list_str.index('\t') |
| 47 | + del list_str[ptr] |
| 48 | + space_need_to_insert = 4 - (ptr%4) |
| 49 | + j = 0 |
| 50 | + while j < space_need_to_insert: |
| 51 | + list_str.insert(ptr,' ') |
| 52 | + j = j+1 |
| 53 | + |
| 54 | + i = i-1 |
| 55 | + |
| 56 | + line = ''.join(list_str) #列表恢复成字符串 |
| 57 | + return line |
| 58 | + |
| 59 | +#删除每行末尾多余的空格 统一使用\n作为结尾 |
| 60 | +def formattail(line): |
| 61 | + line = line.rstrip() |
| 62 | + line = line + '\n' |
| 63 | + return line |
| 64 | + |
| 65 | +#对单个文件进行格式整理 |
| 66 | +def format_codes(filename): |
| 67 | + try: |
| 68 | + file=open(filename,'r',encoding = 'utf-8') |
| 69 | + file_temp=open('temp','w',encoding = 'utf-8') |
| 70 | + for line in file: |
| 71 | + line = tab2spaces(line) |
| 72 | + line = formattail(line) |
| 73 | + file_temp.write(line) |
| 74 | + file_temp.close() |
| 75 | + file.close() |
| 76 | + os.remove(filename) |
| 77 | + os.rename('temp',filename) |
| 78 | + |
| 79 | +def get_encode_info(file): |
| 80 | + with open(file, 'rb') as f: |
| 81 | + code = chardet.detect(f.read())['encoding'] |
| 82 | + #charde库有一定几率对当前文件的编码识别不准确 |
| 83 | + if code == 'EUC-JP': #容易将含着少量中文的英文字符文档识别为日语编码格式 |
| 84 | + code = 'GB2312' |
| 85 | + elif code == 'ISO-8859-1': #部分文件GB2312码会被识别成ISO-8859-1 |
| 86 | + code = 'GB2312' |
| 87 | + |
| 88 | + if not (code == 'ascii' or code == 'utf-8' or code == 'GB2312' #编码识别正确 |
| 89 | + or code == 'Windows-1252'): # Windows-1252 是由于意法半导体是法国企业's的'是法语的'导致的 |
| 90 | + if code != None: |
| 91 | + print('未处理,需人工确认:'+code+':'+file) #需要人工确认 |
| 92 | + code = None |
| 93 | + |
| 94 | + return code |
| 95 | + |
| 96 | +#将单个文件转为UTF-8编码 |
| 97 | +def conver_to_utf_8 (path): |
| 98 | + try: |
| 99 | + info = get_encode_info(path) |
| 100 | + if info == None: |
| 101 | + return 0 #0 失败 |
| 102 | + |
| 103 | + file=open(path,'rb+') |
| 104 | + data = file.read() |
| 105 | + string = data.decode(info) |
| 106 | + utf = string.encode('utf-8') |
| 107 | + file.seek(0) |
| 108 | + file.write(utf) |
| 109 | + file.close() |
| 110 | + return 1 #1成功 |
| 111 | + except UnicodeDecodeError: |
| 112 | + print("UnicodeDecodeError未处理,需人工确认"+path) |
| 113 | + return 0 |
| 114 | + except UnicodeEncodeError: |
| 115 | + print("UnicodeEncodeError未处理,需人工确认"+path) |
| 116 | + return 0 |
| 117 | + |
| 118 | +# 递归扫描目录下的所有文件 |
| 119 | +def traversalallfile(path): |
| 120 | + filelist=os.listdir(path) |
| 121 | + for file in filelist: |
| 122 | + filepath=os.path.join(path,file) |
| 123 | + if os.path.isdir(filepath): |
| 124 | + traversalallfile(filepath) |
| 125 | + elif os.path.isfile(filepath): |
| 126 | + if filepath.endswith(".c") == True or filepath.endswith(".h") == True: #只处理.c和.h文件 |
| 127 | + if conver_to_utf_8(filepath) == 1: #先把这个文件转为UTF-8编码,1成功 |
| 128 | + format_codes(filepath) #再对这个文件进行格式整理 |
| 129 | + |
| 130 | +def formatfiles(): |
| 131 | + workpath = input('enter work path: ') |
| 132 | + traversalallfile(workpath) |
| 133 | + |
| 134 | +if __name__ == '__main__': |
| 135 | + formatfiles() |
0 commit comments