Skip to content

Commit 57f51e5

Browse files
preprocess with PIL the full val dataset and save binary
test=develop
1 parent 69cb979 commit 57f51e5

File tree

1 file changed

+109
-0
lines changed

1 file changed

+109
-0
lines changed
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
2+
#
3+
# licensed under the apache license, version 2.0 (the "license");
4+
# you may not use this file except in compliance with the license.
5+
# you may obtain a copy of the license at
6+
#
7+
# http://www.apache.org/licenses/license-2.0
8+
#
9+
# unless required by applicable law or agreed to in writing, software
10+
# distributed under the license is distributed on an "as is" basis,
11+
# without warranties or conditions of any kind, either express or implied.
12+
# see the license for the specific language governing permissions and
13+
# limitations under the license.
14+
import unittest
15+
import os
16+
import numpy as np
17+
import time
18+
import sys
19+
import random
20+
import functools
21+
import contextlib
22+
from PIL import Image, ImageEnhance
23+
import math
24+
25+
random.seed(0)
26+
np.random.seed(0)
27+
28+
DATA_DIM = 224
29+
30+
SIZE_FLOAT32 = 4
31+
SIZE_INT64 = 8
32+
33+
DATA_DIR = '/data/ILSVRC2012'
34+
35+
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
36+
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
37+
38+
39+
def resize_short(img, target_size):
40+
percent = float(target_size) / min(img.size[0], img.size[1])
41+
resized_width = int(round(img.size[0] * percent))
42+
resized_height = int(round(img.size[1] * percent))
43+
img = img.resize((resized_width, resized_height), Image.LANCZOS)
44+
return img
45+
46+
47+
def crop_image(img, target_size, center):
48+
width, height = img.size
49+
size = target_size
50+
if center == True:
51+
w_start = (width - size) / 2
52+
h_start = (height - size) / 2
53+
else:
54+
w_start = np.random.randint(0, width - size + 1)
55+
h_start = np.random.randint(0, height - size + 1)
56+
w_end = w_start + size
57+
h_end = h_start + size
58+
img = img.crop((w_start, h_start, w_end, h_end))
59+
return img
60+
61+
62+
def process_image(img_path, mode, color_jitter, rotate):
63+
img = Image.open(img_path)
64+
img = resize_short(img, target_size=256)
65+
img = crop_image(img, target_size=DATA_DIM, center=True)
66+
if img.mode != 'RGB':
67+
img = img.convert('RGB')
68+
img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
69+
img -= img_mean
70+
img /= img_std
71+
return img
72+
73+
74+
def reader():
75+
data_dir = DATA_DIR
76+
file_list = os.path.join(data_dir, 'val_list.txt')
77+
bin_file = os.path.join(data_dir, 'data.bin')
78+
with open(file_list) as flist:
79+
lines = [line.strip() for line in flist]
80+
num_images = len(lines)
81+
82+
with open(bin_file, "w+b") as of:
83+
of.seek(0)
84+
num = np.array(int(num_images)).astype('int64')
85+
of.write(num.tobytes())
86+
for idx, line in enumerate(lines):
87+
img_path, label = line.split()
88+
img_path = os.path.join(data_dir, img_path)
89+
if not os.path.exists(img_path):
90+
continue
91+
92+
#save image(float32) to file
93+
img = process_image(
94+
img_path, 'val', color_jitter=False, rotate=False)
95+
np_img = np.array(img)
96+
of.seek(SIZE_INT64 + SIZE_FLOAT32 * DATA_DIM * DATA_DIM * 3 *
97+
idx)
98+
of.write(np_img.astype('float32').tobytes())
99+
100+
#save label(int64_t) to file
101+
label_int = (int)(label)
102+
np_label = np.array(label_int)
103+
of.seek(SIZE_INT64 + SIZE_FLOAT32 * DATA_DIM * DATA_DIM * 3 *
104+
num_images + idx * SIZE_INT64)
105+
of.write(np_label.astype('int64').tobytes())
106+
107+
108+
if __name__ == '__main__':
109+
reader()

0 commit comments

Comments
 (0)