|
21 | 21 | import contextlib
|
22 | 22 | from PIL import Image, ImageEnhance
|
23 | 23 | import math
|
| 24 | +from paddle.dataset.common import download |
24 | 25 |
|
25 | 26 | random.seed(0)
|
26 | 27 | np.random.seed(0)
|
|
30 | 31 | SIZE_FLOAT32 = 4
|
31 | 32 | SIZE_INT64 = 8
|
32 | 33 |
|
33 |
| -DATA_DIR = './data/ILSVRC2012/data.bin' |
34 |
| - |
35 | 34 | img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
|
36 | 35 | img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
|
37 | 36 |
|
@@ -71,15 +70,60 @@ def process_image(img_path, mode, color_jitter, rotate):
|
71 | 70 | return img
|
72 | 71 |
|
73 | 72 |
|
| 73 | +def download_unzip(): |
| 74 | + |
| 75 | + tmp_folder = 'int8/download' |
| 76 | + |
| 77 | + cache_folder = os.path.expanduser('~/.cache/' + tmp_folder) |
| 78 | + |
| 79 | + data_urls = [] |
| 80 | + data_md5s = [] |
| 81 | + |
| 82 | + data_urls.append( |
| 83 | + 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partaa' |
| 84 | + ) |
| 85 | + data_md5s.append('60f6525b0e1d127f345641d75d41f0a8') |
| 86 | + data_urls.append( |
| 87 | + 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab' |
| 88 | + ) |
| 89 | + data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5') |
| 90 | + |
| 91 | + file_names = [] |
| 92 | + for i in range(0, len(data_urls)): |
| 93 | + download(data_urls[i], tmp_folder, data_md5s[i]) |
| 94 | + file_names.append(data_urls[i].split('/')[-1]) |
| 95 | + |
| 96 | + zip_path = os.path.join(cache_folder, 'full_imagenet_val.tar.gz') |
| 97 | + |
| 98 | + if not os.path.exists(zip_path): |
| 99 | + cat_command = 'cat' |
| 100 | + for file_name in file_names: |
| 101 | + cat_command += ' ' + os.path.join(cache_folder, file_name) |
| 102 | + cat_command += ' > ' + zip_path |
| 103 | + os.system(cat_command) |
| 104 | + |
| 105 | + if not os.path.exists(cache_folder): |
| 106 | + cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(cache_folder, zip_path) |
| 107 | + |
| 108 | + cmd = 'rm -rf {3} && ln -s {1} {0}'.format("data", cache_folder, zip_path) |
| 109 | + |
| 110 | + os.system(cmd) |
| 111 | + |
| 112 | + data_dir = os.path.expanduser(cache_folder + 'data') |
| 113 | + |
| 114 | + return data_dir |
| 115 | + |
| 116 | + |
74 | 117 | def reader():
|
75 |
| - data_dir = DATA_DIR |
| 118 | + data_dir = download_unzip() |
76 | 119 | file_list = os.path.join(data_dir, 'val_list.txt')
|
77 |
| - bin_file = os.path.join(data_dir, 'data.bin') |
| 120 | + output_file = os.path.join(data_dir, 'int8_full_val.bin') |
78 | 121 | with open(file_list) as flist:
|
79 | 122 | lines = [line.strip() for line in flist]
|
80 | 123 | num_images = len(lines)
|
81 | 124 |
|
82 |
| - with open(bin_file, "w+b") as of: |
| 125 | + with open(output_file, "w+b") as of: |
| 126 | + #save num_images(int64_t) to file |
83 | 127 | of.seek(0)
|
84 | 128 | num = np.array(int(num_images)).astype('int64')
|
85 | 129 | of.write(num.tobytes())
|
|
0 commit comments