@@ -17,8 +17,16 @@ def isCreateOrDeleteFolder(path, flag):
17
17
return flagAbsPath
18
18
19
19
20
- def splitTrainVal (root , abs_train_root_path , abs_val_root_path , abs_test_root_path , train_txt , val_txt , test_txt , flag ):
21
-
20
+ def splitTrainVal (
21
+ root ,
22
+ abs_train_root_path ,
23
+ abs_val_root_path ,
24
+ abs_test_root_path ,
25
+ train_txt ,
26
+ val_txt ,
27
+ test_txt ,
28
+ flag ,
29
+ ):
22
30
data_abs_path = os .path .abspath (root )
23
31
label_file_name = args .detLabelFileName if flag == "det" else args .recLabelFileName
24
32
label_file_path = os .path .join (data_abs_path , label_file_name )
@@ -29,13 +37,15 @@ def splitTrainVal(root, abs_train_root_path, abs_val_root_path, abs_test_root_pa
29
37
label_record_len = len (label_file_content )
30
38
31
39
for index , label_record_info in enumerate (label_file_content ):
32
- image_relative_path , image_label = label_record_info .split (' \t ' )
40
+ image_relative_path , image_label = label_record_info .split (" \t " )
33
41
image_name = os .path .basename (image_relative_path )
34
42
35
43
if flag == "det" :
36
44
image_path = os .path .join (data_abs_path , image_name )
37
45
elif flag == "rec" :
38
- image_path = os .path .join (data_abs_path , args .recImageDirName , image_name )
46
+ image_path = os .path .join (
47
+ data_abs_path , args .recImageDirName , image_name
48
+ )
39
49
40
50
train_val_test_ratio = args .trainValTestRatio .split (":" )
41
51
train_ratio = eval (train_val_test_ratio [0 ]) / 10
@@ -77,27 +87,46 @@ def genDetRecTrainVal(args):
77
87
removeFile (os .path .join (args .recRootPath , "val.txt" ))
78
88
removeFile (os .path .join (args .recRootPath , "test.txt" ))
79
89
80
- detTrainTxt = open (os .path .join (args .detRootPath , "train.txt" ), "a" , encoding = "UTF-8" )
90
+ detTrainTxt = open (
91
+ os .path .join (args .detRootPath , "train.txt" ), "a" , encoding = "UTF-8"
92
+ )
81
93
detValTxt = open (os .path .join (args .detRootPath , "val.txt" ), "a" , encoding = "UTF-8" )
82
94
detTestTxt = open (os .path .join (args .detRootPath , "test.txt" ), "a" , encoding = "UTF-8" )
83
- recTrainTxt = open (os .path .join (args .recRootPath , "train.txt" ), "a" , encoding = "UTF-8" )
95
+ recTrainTxt = open (
96
+ os .path .join (args .recRootPath , "train.txt" ), "a" , encoding = "UTF-8"
97
+ )
84
98
recValTxt = open (os .path .join (args .recRootPath , "val.txt" ), "a" , encoding = "UTF-8" )
85
99
recTestTxt = open (os .path .join (args .recRootPath , "test.txt" ), "a" , encoding = "UTF-8" )
86
100
87
- splitTrainVal (args .datasetRootPath , detAbsTrainRootPath , detAbsValRootPath , detAbsTestRootPath , detTrainTxt , detValTxt ,
88
- detTestTxt , "det" )
101
+ splitTrainVal (
102
+ args .datasetRootPath ,
103
+ detAbsTrainRootPath ,
104
+ detAbsValRootPath ,
105
+ detAbsTestRootPath ,
106
+ detTrainTxt ,
107
+ detValTxt ,
108
+ detTestTxt ,
109
+ "det" ,
110
+ )
89
111
90
112
for root , dirs , files in os .walk (args .datasetRootPath ):
91
113
for dir in dirs :
92
- if dir == 'crop_img' :
93
- splitTrainVal (root , recAbsTrainRootPath , recAbsValRootPath , recAbsTestRootPath , recTrainTxt , recValTxt ,
94
- recTestTxt , "rec" )
114
+ if dir == "crop_img" :
115
+ splitTrainVal (
116
+ root ,
117
+ recAbsTrainRootPath ,
118
+ recAbsValRootPath ,
119
+ recAbsTestRootPath ,
120
+ recTrainTxt ,
121
+ recValTxt ,
122
+ recTestTxt ,
123
+ "rec" ,
124
+ )
95
125
else :
96
126
continue
97
127
break
98
128
99
129
100
-
101
130
if __name__ == "__main__" :
102
131
# 功能描述:分别划分检测和识别的训练集、验证集、测试集
103
132
# 说明:可以根据自己的路径和需求调整参数,图像数据往往多人合作分批标注,每一批图像数据放在一个文件夹内用PPOCRLabel进行标注,
@@ -107,40 +136,43 @@ def genDetRecTrainVal(args):
107
136
"--trainValTestRatio" ,
108
137
type = str ,
109
138
default = "6:2:2" ,
110
- help = "ratio of trainset:valset:testset" )
139
+ help = "ratio of trainset:valset:testset" ,
140
+ )
111
141
parser .add_argument (
112
142
"--datasetRootPath" ,
113
143
type = str ,
114
144
default = "../train_data/" ,
115
- help = "path to the dataset marked by ppocrlabel, E.g, dataset folder named 1,2,3..."
145
+ help = "path to the dataset marked by ppocrlabel, E.g, dataset folder named 1,2,3..." ,
116
146
)
117
147
parser .add_argument (
118
148
"--detRootPath" ,
119
149
type = str ,
120
150
default = "../train_data/det" ,
121
- help = "the path where the divided detection dataset is placed" )
151
+ help = "the path where the divided detection dataset is placed" ,
152
+ )
122
153
parser .add_argument (
123
154
"--recRootPath" ,
124
155
type = str ,
125
156
default = "../train_data/rec" ,
126
- help = "the path where the divided recognition dataset is placed"
157
+ help = "the path where the divided recognition dataset is placed" ,
127
158
)
128
159
parser .add_argument (
129
160
"--detLabelFileName" ,
130
161
type = str ,
131
162
default = "Label.txt" ,
132
- help = "the name of the detection annotation file" )
163
+ help = "the name of the detection annotation file" ,
164
+ )
133
165
parser .add_argument (
134
166
"--recLabelFileName" ,
135
167
type = str ,
136
168
default = "rec_gt.txt" ,
137
- help = "the name of the recognition annotation file"
169
+ help = "the name of the recognition annotation file" ,
138
170
)
139
171
parser .add_argument (
140
172
"--recImageDirName" ,
141
173
type = str ,
142
174
default = "crop_img" ,
143
- help = "the name of the folder where the cropped recognition dataset is located"
175
+ help = "the name of the folder where the cropped recognition dataset is located" ,
144
176
)
145
177
args = parser .parse_args ()
146
- genDetRecTrainVal (args )
178
+ genDetRecTrainVal (args )
0 commit comments