Skip to content

Commit a71fe61

Browse files
authored
Merge pull request #426 from MaximKuklin/master
Efficient image loading
2 parents e97e082 + 3498065 commit a71fe61

21 files changed

+505
-0
lines changed

Efficient-image-loading/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
This contains the code for **Efficient image loading**. For more information - visit [**Efficient image loading**](https://www.learnopencv.com/efficient-image-loading/)
2+
3+
4+
# AI Courses by OpenCV
5+
6+
Want to become an expert in AI? [AI Courses by OpenCV](https://opencv.org/courses/) is a great place to start.
7+
8+
<a href="https://opencv.org/courses/">
9+
<p align="center">
10+
<img src="https://www.learnopencv.com/wp-content/uploads/2020/04/AI-Courses-By-OpenCV-Github.png">
11+
</p>
12+
</a>

Efficient-image-loading/__init__.py

Whitespace-only changes.
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
from argparse import ArgumentParser
2+
3+
import numpy as np
4+
from prettytable import PrettyTable
5+
6+
from create_lmdb import store_many_lmdb
7+
from create_tfrecords import store_many_tfrecords
8+
from loader import (
9+
CV2Loader,
10+
LmdbLoader,
11+
PILLoader,
12+
TFRecordsLoader,
13+
TurboJpegLoader,
14+
methods,
15+
)
16+
from tools import get_images_paths
17+
18+
19+
def count_time(loader, iters):
20+
time_list = []
21+
num_images = len(loader)
22+
for i in range(iters):
23+
loader = iter(loader)
24+
for idx in range(num_images):
25+
image, time = next(loader)
26+
time_list.append(time)
27+
time_list = np.asarray(time_list)
28+
print_stats(time_list, type(loader).__name__)
29+
return np.asarray(time_list)
30+
31+
32+
def print_stats(time, name):
33+
print("Time measures for {}:".format(name))
34+
print("{} mean time - {:.8f} seconds".format(name, time.mean()))
35+
print("{} median time - {:.8f} seconds".format(name, np.median(time)))
36+
print("{} std time - {:.8f} seconds".format(name, time.std()))
37+
print("{} min time - {:.8f} seconds".format(name, time.min()))
38+
print("{} max time - {:.8f} seconds".format(name, time.max()))
39+
print("\n")
40+
41+
42+
def benchmark(method, path, iters=100, **kwargs):
43+
44+
image_loader = methods[method](path, **kwargs) # get image loader
45+
time = count_time(image_loader, iters) # measure the time for loading
46+
47+
return time
48+
49+
50+
if __name__ == "__main__":
51+
parser = ArgumentParser()
52+
53+
parser.add_argument(
54+
"--path", "-p", type=str, help="path to image folder",
55+
)
56+
parser.add_argument(
57+
"--method",
58+
nargs="+",
59+
required=True,
60+
choices=["cv2", "pil", "turbojpeg", "lmdb", "tfrecords"],
61+
help="Image loading methods to use in benchmark",
62+
)
63+
parser.add_argument(
64+
"--mode",
65+
"-m",
66+
type=str,
67+
required=True,
68+
choices=["BGR", "RGB"],
69+
help="Image color mode",
70+
)
71+
parser.add_argument(
72+
"--iters", type=int, help="Number of iterations to average the results",
73+
)
74+
args = parser.parse_args()
75+
76+
benchmark_methods = args.method
77+
image_paths = get_images_paths(args.path)
78+
79+
results = {}
80+
for method in benchmark_methods:
81+
if method == "lmdb":
82+
path = "./lmdb/images"
83+
store_many_lmdb(image_paths, path)
84+
elif method == "tfrecords":
85+
path = "./tfrecords/images.tfrecords"
86+
store_many_tfrecords(image_paths, path)
87+
else:
88+
path = args.path
89+
90+
time = benchmark(method, path, mode=args.mode, iters=args.iters)
91+
results.update({method: time})
92+
93+
table = PrettyTable(["Loader", "Mean time", "Median time"])
94+
95+
print(
96+
f"Benchmark on {len(image_paths)} {args.mode} images with {args.iters} averaging iteration results:\n",
97+
)
98+
99+
for method, time in results.items():
100+
table.add_row([method, time.mean(), np.median(time)])
101+
print(table)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import os
2+
from argparse import ArgumentParser
3+
4+
import cv2
5+
import lmdb
6+
import numpy as np
7+
8+
from tools import get_images_paths
9+
10+
11+
def store_many_lmdb(images_list, save_path):
12+
13+
num_images = len(images_list) # number of images in our folder
14+
15+
file_sizes = [os.path.getsize(item) for item in images_list] # all file sizes
16+
max_size_index = np.argmax(file_sizes) # the maximum file size index
17+
18+
# maximum database size in bytes
19+
map_size = num_images * cv2.imread(images_list[max_size_index]).nbytes * 10
20+
21+
env = lmdb.open(save_path, map_size=map_size) # create lmdb environment
22+
23+
with env.begin(write=True) as txn: # start writing to environment
24+
for i, image in enumerate(images_list):
25+
with open(image, "rb") as file:
26+
data = file.read() # read image as bytes
27+
key = f"{i:08}" # get image key
28+
txn.put(key.encode("ascii"), data) # put the key-value into database
29+
30+
env.close() # close the environment
31+
32+
33+
if __name__ == "__main__":
34+
parser = ArgumentParser()
35+
parser.add_argument(
36+
"--path",
37+
"-p",
38+
type=str,
39+
required=True,
40+
help="path to the images folder to collect",
41+
)
42+
parser.add_argument(
43+
"--output",
44+
"-o",
45+
type=str,
46+
required=True,
47+
help='path to the output environment directory file i.e. "path/to/folder/env/"',
48+
)
49+
50+
args = parser.parse_args()
51+
if not os.path.exists(args.output):
52+
os.makedirs(args.output)
53+
54+
images = get_images_paths(args.path)
55+
store_many_lmdb(images, args.output)
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import os
2+
from argparse import ArgumentParser
3+
4+
import tensorflow as tf
5+
6+
from tools import get_images_paths
7+
8+
9+
def _byte_feature(value):
10+
"""Convert string / byte into bytes_list."""
11+
if isinstance(value, type(tf.constant(0))):
12+
value = value.numpy() # BytesList can't unpack string from EagerTensor.
13+
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
14+
15+
16+
def _int64_feature(value):
17+
"""Convert bool / enum / int / uint into int64_list."""
18+
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
19+
20+
21+
def image_example(image_string, label):
22+
feature = {
23+
"label": _int64_feature(label),
24+
"image_raw": _byte_feature(image_string),
25+
}
26+
return tf.train.Example(features=tf.train.Features(feature=feature))
27+
28+
29+
def store_many_tfrecords(images_list, save_file):
30+
31+
assert save_file.endswith(
32+
".tfrecords",
33+
), 'File path is wrong, it should contain "*myname*.tfrecords"'
34+
35+
directory = os.path.dirname(save_file)
36+
if not os.path.exists(directory):
37+
os.makedirs(directory)
38+
39+
with tf.io.TFRecordWriter(save_file) as writer: # start writer
40+
for label, filename in enumerate(images_list): # cycle by each image path
41+
image_string = open(filename, "rb").read() # read the image as bytes string
42+
tf_example = image_example(
43+
image_string, label,
44+
) # save the data as tf.Example object
45+
writer.write(tf_example.SerializeToString()) # and write it into database
46+
47+
48+
if __name__ == "__main__":
49+
parser = ArgumentParser()
50+
parser.add_argument(
51+
"--path",
52+
"-p",
53+
type=str,
54+
required=True,
55+
help="path to the images folder to collect",
56+
)
57+
parser.add_argument(
58+
"--output",
59+
"-o",
60+
type=str,
61+
required=True,
62+
help='path to the output tfrecords file i.e. "path/to/folder/myname.tfrecords"',
63+
)
64+
65+
args = parser.parse_args()
66+
image_paths = get_images_paths(args.path)
67+
store_many_tfrecords(image_paths, args.output)
47 KB
Loading
24.2 KB
Loading
12.5 KB
Loading
70.9 KB
Loading
9.42 KB
Loading

0 commit comments

Comments
 (0)