-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathpreproc_db.py
More file actions
177 lines (147 loc) · 7.97 KB
/
preproc_db.py
File metadata and controls
177 lines (147 loc) · 7.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
'''
===============================================================================
Tool to preprocess images before training/using it with CNN for re-identification.
This tool allows a user to process one image or images in a directory (no nested directories).
USAGE:
with required parameters only (optional are defaults):
python prepare_db.py -i <image_path/image_dir>
with all optional parametres:
python prepare_db.py -i <image_path/image_dir> -d <yes/no> -c <path_config>
-o <path_output_folder> -l <path_to_csv>
FUNCTIONALITY:
1) Interactive drawing tool to highligh the region of interest in the image.
Use option -d no to disable.
2) After drawing the script crops each image by highlighed region and resizes it to
the size specified in config file.
3) If csv file is specified then files and corresponding labels are read from
the file. Otherwise, it is assumed that the name of the folder is the label.
4) If files are supplied in one folder with labels in csv then
the script rearranges the files in folders based on the label.
PARAMETERS:
-i path to image or a folder with images to process (no nested). Required argument.
-d yes/no, activate or not mask drawing tool. Default: yes.
Use it to localize a pattern of interest.
-c path to configuration file with default parameters. Default: configs/manta.json
Preconfigured files: configs/manta.json for manta rays, configs/whale.json for whales.
-o path to save processed files. Default is in config.prod.output
-l path to a csv files with a list of files and corresponding labels. Default is in config.prod.lfile.
README:
If drawing tool is activated, draw a line around a pattern of interest.
To change a mask simply start drawing a new line.
Key 's' - to save the drawing
Key 'q' - to process next image without saving current one
Key esc - to exit the drawing tool
Each image is cropped by mask, resized to the size specified in config file and saved to
an output directory.
===============================================================================
'''
import numpy as np
import os, argparse, json
from utils.preprocessing import crop_im_by_mask, resize_imgs, convert_to_fmt
from utils.drawer import MaskDrawer
from utils.utils import str2bool
argparser = argparse.ArgumentParser(description='Prepare database from the data')
argparser.add_argument('-i', '--impath', required = True, help='Path to image/folder with images to process (no nested)')
argparser.add_argument('-d','--draw', type=str2bool, help='Activate mask drawing tool for each image: True/False. Default: True', default=True)
argparser.add_argument('-c','--conf', help='Path to configuration file with default parameters', default='configs/manta.json')
argparser.add_argument('-o', '--output', help='Path to output files. Default is in config.prod.output')
argparser.add_argument('-l','--lfile', help='List of files to process in csv file (w/header:file,label). Filenames only, not the full path. Default is in config.prod.lfile.')
argparser.add_argument('-x','--idx', type=int, default=0, help='Index (number) of file to resume the process. Zero based. Useful for large folders')
if __name__ == "__main__":
print(__doc__)
#Get arguments
args = argparser.parse_args()
impath = args.impath.strip(os.sep)
if not os.path.exists(impath):
raise ValueError('Image file/folder does not exist. Check input.')
config_path = args.conf
if not os.path.exists(config_path):
raise Exception('Config file does not exist. Check input.')
#Get flag to activate/deactivate drawing tool
draw = args.draw
#Read config file
with open(config_path) as config_buffer:
config = json.loads(config_buffer.read())
if args.output is None:
output_dir = config['prod']['output'].strip(os.sep)
else:
output_dir = args.output.strip(os.sep)
if args.lfile is None:
lfile = config['prod']['lfile']
else:
lfile = args.lfile
#Reading list of image filenames
if os.path.isdir(impath):
if os.path.exists(lfile):
print('Reading only files specified in a file {}'.format(lfile))
#Read files and labels from csv
fileslabels = np.genfromtxt(lfile, dtype=str, skip_header=1, delimiter=',')
#Append parent directory to each filename to make a path
files = np.array([os.path.join(impath,file) for file in fileslabels[:,0]])
#Remove extension from filename (all files will be converted to .png in later steps)
fileslabels = np.array([[os.path.splitext(row[0])[0], row[1]] for row in fileslabels])
#Create dictionary filename_noext: label
file2lab = dict(fileslabels)
else:
files = [os.path.join(impath, f) for f in os.listdir(impath) if os.path.isfile(os.path.join(impath, f))]
print('Found {} files in source {}. Subdirectories are ignored'.format(len(files), impath))
else:
if not os.path.exists(impath):
raise ValueError('Image file does not exist. Check input')
print('Reading image {}'.format(impath))
files = [impath]
#Add subdirectory to the output folder
(_, folder) = os.path.split(impath)
output_dir = os.path.join(output_dir, folder)
print('Output files will be stored in {}'.format(output_dir))
if not os.path.exists(output_dir): os.makedirs(output_dir)
size = (config['model']['input_width'], config['model']['input_height'])
#Count processed images
proc_count = 0
#Sort order of the files
files.sort()
#Draw mask on each file and save
for i in range(args.idx, len(files)):
file = files[i]
print('Processing file {} {}'.format(i, file))
if draw:
#Get filename for mask
(_, imname) = os.path.split(file)
maskpath = os.path.join(output_dir, imname)
md = MaskDrawer(file, maskpath)
respond = md.run()
if respond == 'exit':
print('Exiting the programm')
quit()
if respond == 'next':
print('Skipped file {} {}'.format(i, file))
#do not include this image in the dataset
continue
if respond == 'save' and md.done:
proc_count += 1
square = (config['model']['input_width']==config['model']['input_height'])
croppedpath = crop_im_by_mask(file, maskpath, output_dir, padding = 0, square=square)
if respond == 'save' and not md.done:
proc_count += 1
croppedpath = file
else:
proc_count += 1
croppedpath = file
#Resize to the size and convert to png format
resizedpath = resize_imgs(croppedpath, output_dir, size)
resizedpath = convert_to_fmt(resizedpath, imformat = 'png')
print('Processed {} images'.format(proc_count))
print('Total processed {} images'.format(proc_count))
#If files are supplied in one folder with labels in csv, rearrange processed it to one folder per class
if os.path.exists(lfile):
print('Rearranging files in directories as per labels...')
out_files = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if os.path.isfile(os.path.join(output_dir, f))]
for file in out_files:
#Get filename only with no extention
filename = os.path.basename(file)
filename_noext = os.path.splitext(filename)[0]
#Get label and create folder with the same name
folder = os.path.join(output_dir, file2lab[filename_noext])
if not os.path.exists(folder): os.makedirs(folder)
#Move image to it's class folder
os.rename(file, os.path.join(folder, filename))