Skip to content

Commit b46e467

Browse files
add wget and unzip part and change data_dir
test=develop
1 parent 894aa9b commit b46e467

File tree

1 file changed

+49
-5
lines changed

1 file changed

+49
-5
lines changed

paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import contextlib
2222
from PIL import Image, ImageEnhance
2323
import math
24+
from paddle.dataset.common import download
2425

2526
random.seed(0)
2627
np.random.seed(0)
@@ -30,8 +31,6 @@
3031
SIZE_FLOAT32 = 4
3132
SIZE_INT64 = 8
3233

33-
DATA_DIR = './data/ILSVRC2012/data.bin'
34-
3534
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
3635
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
3736

@@ -71,15 +70,60 @@ def process_image(img_path, mode, color_jitter, rotate):
7170
return img
7271

7372

73+
def download_unzip():
74+
75+
tmp_folder = 'int8/download'
76+
77+
cache_folder = os.path.expanduser('~/.cache/' + tmp_folder)
78+
79+
data_urls = []
80+
data_md5s = []
81+
82+
data_urls.append(
83+
'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partaa'
84+
)
85+
data_md5s.append('60f6525b0e1d127f345641d75d41f0a8')
86+
data_urls.append(
87+
'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab'
88+
)
89+
data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5')
90+
91+
file_names = []
92+
for i in range(0, len(data_urls)):
93+
download(data_urls[i], tmp_folder, data_md5s[i])
94+
file_names.append(data_urls[i].split('/')[-1])
95+
96+
zip_path = os.path.join(cache_folder, 'full_imagenet_val.tar.gz')
97+
98+
if not os.path.exists(zip_path):
99+
cat_command = 'cat'
100+
for file_name in file_names:
101+
cat_command += ' ' + os.path.join(cache_folder, file_name)
102+
cat_command += ' > ' + zip_path
103+
os.system(cat_command)
104+
105+
if not os.path.exists(cache_folder):
106+
cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(cache_folder, zip_path)
107+
108+
cmd = 'rm -rf {3} && ln -s {1} {0}'.format("data", cache_folder, zip_path)
109+
110+
os.system(cmd)
111+
112+
data_dir = os.path.expanduser(cache_folder + 'data')
113+
114+
return data_dir
115+
116+
74117
def reader():
75-
data_dir = DATA_DIR
118+
data_dir = download_unzip()
76119
file_list = os.path.join(data_dir, 'val_list.txt')
77-
bin_file = os.path.join(data_dir, 'data.bin')
120+
output_file = os.path.join(data_dir, 'int8_full_val.bin')
78121
with open(file_list) as flist:
79122
lines = [line.strip() for line in flist]
80123
num_images = len(lines)
81124

82-
with open(bin_file, "w+b") as of:
125+
with open(output_file, "w+b") as of:
126+
#save num_images(int64_t) to file
83127
of.seek(0)
84128
num = np.array(int(num_images)).astype('int64')
85129
of.write(num.tobytes())

0 commit comments

Comments
 (0)