Skip to content

Commit ecb1798

Browse files
authored
Merge pull request #4416 from mysterywolf/tools
[tools] 增加formatting自动化格式调整脚本(初版)
2 parents ea22af5 + 0dacf70 commit ecb1798

File tree

3 files changed

+135
-0
lines changed

3 files changed

+135
-0
lines changed

tools/as.sh

100755100644
File mode changed.
File renamed without changes.

tools/formatting.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#
2+
# File : formatting.py
3+
# This file is part of RT-Thread RTOS
4+
# COPYRIGHT (C) 2006 - 2018, RT-Thread Development Team
5+
#
6+
# This program is free software; you can redistribute it and/or modify
7+
# it under the terms of the GNU General Public License as published by
8+
# the Free Software Foundation; either version 2 of the License, or
9+
# (at your option) any later version.
10+
#
11+
# This program is distributed in the hope that it will be useful,
12+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
# GNU General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU General Public License along
17+
# with this program; if not, write to the Free Software Foundation, Inc.,
18+
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19+
#
20+
# Change Logs:
21+
# Date Author Notes
22+
# 2021-03-02 Meco Man The first version
23+
# 2021-03-04 Meco Man 增加统一转换成UTF-8编码格式功能
24+
25+
26+
#本文件会自动对指定路径下的所有文件包括子文件夹的文件(仅针对.c.h)进行扫描
27+
# 1)将源文件编码统一为UTF-8;
28+
# 2)将TAB键替换为空格;
29+
# 3)将每行末尾多余的空格删除,并统一换行符为'\n';
30+
#使用时只需要双击本文件,输入要扫描的文件夹路径即可
31+
#不能保证100%全部成功转换为UTF-8,有一些编码特殊或识别不准确会在终端打印信息,需人工转换
32+
33+
#欢迎对本文件的功能继续做出补充,欢迎提交PR
34+
35+
import os
36+
import chardet
37+
38+
#用空格代替TAB键
39+
#这里并不是简单的将TAB替换成4个空格
40+
#空格个数到底是多少需要计算,因为TAB制表本身有自动对齐的功能
41+
def tab2spaces(line):
42+
list_str = list(line) #字符串打散成列表,放边操作
43+
i = list_str.count('\t')
44+
45+
while i > 0:
46+
ptr = list_str.index('\t')
47+
del list_str[ptr]
48+
space_need_to_insert = 4 - (ptr%4)
49+
j = 0
50+
while j < space_need_to_insert:
51+
list_str.insert(ptr,' ')
52+
j = j+1
53+
54+
i = i-1
55+
56+
line = ''.join(list_str) #列表恢复成字符串
57+
return line
58+
59+
#删除每行末尾多余的空格 统一使用\n作为结尾
60+
def formattail(line):
61+
line = line.rstrip()
62+
line = line + '\n'
63+
return line
64+
65+
#对单个文件进行格式整理
66+
def format_codes(filename):
67+
try:
68+
file=open(filename,'r',encoding = 'utf-8')
69+
file_temp=open('temp','w',encoding = 'utf-8')
70+
for line in file:
71+
line = tab2spaces(line)
72+
line = formattail(line)
73+
file_temp.write(line)
74+
file_temp.close()
75+
file.close()
76+
os.remove(filename)
77+
os.rename('temp',filename)
78+
79+
def get_encode_info(file):
80+
with open(file, 'rb') as f:
81+
code = chardet.detect(f.read())['encoding']
82+
#charde库有一定几率对当前文件的编码识别不准确
83+
if code == 'EUC-JP': #容易将含着少量中文的英文字符文档识别为日语编码格式
84+
code = 'GB2312'
85+
elif code == 'ISO-8859-1': #部分文件GB2312码会被识别成ISO-8859-1
86+
code = 'GB2312'
87+
88+
if not (code == 'ascii' or code == 'utf-8' or code == 'GB2312' #编码识别正确
89+
or code == 'Windows-1252'): # Windows-1252 是由于意法半导体是法国企业's的'是法语的'导致的
90+
if code != None:
91+
print('未处理,需人工确认:'+code+':'+file) #需要人工确认
92+
code = None
93+
94+
return code
95+
96+
#将单个文件转为UTF-8编码
97+
def conver_to_utf_8 (path):
98+
try:
99+
info = get_encode_info(path)
100+
if info == None:
101+
return 0 #0 失败
102+
103+
file=open(path,'rb+')
104+
data = file.read()
105+
string = data.decode(info)
106+
utf = string.encode('utf-8')
107+
file.seek(0)
108+
file.write(utf)
109+
file.close()
110+
return 1 #1成功
111+
except UnicodeDecodeError:
112+
print("UnicodeDecodeError未处理,需人工确认"+path)
113+
return 0
114+
except UnicodeEncodeError:
115+
print("UnicodeEncodeError未处理,需人工确认"+path)
116+
return 0
117+
118+
# 递归扫描目录下的所有文件
119+
def traversalallfile(path):
120+
filelist=os.listdir(path)
121+
for file in filelist:
122+
filepath=os.path.join(path,file)
123+
if os.path.isdir(filepath):
124+
traversalallfile(filepath)
125+
elif os.path.isfile(filepath):
126+
if filepath.endswith(".c") == True or filepath.endswith(".h") == True: #只处理.c和.h文件
127+
if conver_to_utf_8(filepath) == 1: #先把这个文件转为UTF-8编码,1成功
128+
format_codes(filepath) #再对这个文件进行格式整理
129+
130+
def formatfiles():
131+
workpath = input('enter work path: ')
132+
traversalallfile(workpath)
133+
134+
if __name__ == '__main__':
135+
formatfiles()

0 commit comments

Comments
 (0)