spmallick
diff --git a/‎Efficient-image-loading/README.md‎
Lines changed: 12 additions & 0 deletions b/‎Efficient-image-loading/README.md‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎Efficient-image-loading/__init__.py‎ b/‎Efficient-image-loading/__init__.py‎
diff --git a/‎Efficient-image-loading/benchmark.py‎
Lines changed: 101 additions & 0 deletions b/‎Efficient-image-loading/benchmark.py‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎Efficient-image-loading/create_lmdb.py‎
Lines changed: 55 additions & 0 deletions b/‎Efficient-image-loading/create_lmdb.py‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎Efficient-image-loading/create_tfrecords.py‎
Lines changed: 67 additions & 0 deletions b/‎Efficient-image-loading/create_tfrecords.py‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎Efficient-image-loading/images/cat.jpg‎
47 KB b/‎Efficient-image-loading/images/cat.jpg‎
47 KB
diff --git a/‎Efficient-image-loading/images/pexels/beach.jpg‎
24.2 KB b/‎Efficient-image-loading/images/pexels/beach.jpg‎
24.2 KB
diff --git a/‎Efficient-image-loading/images/pexels/black-rock.jpg‎
12.5 KB b/‎Efficient-image-loading/images/pexels/black-rock.jpg‎
12.5 KB
diff --git a/‎Efficient-image-loading/images/pexels/blue-flowers.jpg‎
70.9 KB b/‎Efficient-image-loading/images/pexels/blue-flowers.jpg‎
70.9 KB
diff --git a/‎Efficient-image-loading/images/pexels/green-hill.jpg‎
9.42 KB b/‎Efficient-image-loading/images/pexels/green-hill.jpg‎
9.42 KB
@@ -0,0 +1,12 @@
+This contains the code for **Efficient image loading**. For more information - visit [**Efficient image loading**](https://www.learnopencv.com/efficient-image-loading/)
+
+
+# AI Courses by OpenCV
+
+Want to become an expert in AI? [AI Courses by OpenCV](https://opencv.org/courses/) is a great place to start. 
+
+<a href="https://opencv.org/courses/">
+<p align="center"> 
+<img src="https://www.learnopencv.com/wp-content/uploads/2020/04/AI-Courses-By-OpenCV-Github.png">
+</p>
+</a>
@@ -0,0 +1,101 @@
+from argparse import ArgumentParser
+
+import numpy as np
+from prettytable import PrettyTable
+
+from create_lmdb import store_many_lmdb
+from create_tfrecords import store_many_tfrecords
+from loader import (
+    CV2Loader,
+    LmdbLoader,
+    PILLoader,
+    TFRecordsLoader,
+    TurboJpegLoader,
+    methods,
+)
+from tools import get_images_paths
+
+
+def count_time(loader, iters):
+    time_list = []
+    num_images = len(loader)
+    for i in range(iters):
+        loader = iter(loader)
+        for idx in range(num_images):
+            image, time = next(loader)
+            time_list.append(time)
+    time_list = np.asarray(time_list)
+    print_stats(time_list, type(loader).__name__)
+    return np.asarray(time_list)
+
+
+def print_stats(time, name):
+    print("Time measures for {}:".format(name))
+    print("{} mean time - {:.8f} seconds".format(name, time.mean()))
+    print("{} median time - {:.8f} seconds".format(name, np.median(time)))
+    print("{} std time - {:.8f} seconds".format(name, time.std()))
+    print("{} min time - {:.8f} seconds".format(name, time.min()))
+    print("{} max time - {:.8f} seconds".format(name, time.max()))
+    print("\n")
+
+
+def benchmark(method, path, iters=100, **kwargs):
+
+    image_loader = methods[method](path, **kwargs)  # get image loader
+    time = count_time(image_loader, iters)  # measure the time for loading
+
+    return time
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+
+    parser.add_argument(
+        "--path", "-p", type=str, help="path to image folder",
+    )
+    parser.add_argument(
+        "--method",
+        nargs="+",
+        required=True,
+        choices=["cv2", "pil", "turbojpeg", "lmdb", "tfrecords"],
+        help="Image loading methods to use in benchmark",
+    )
+    parser.add_argument(
+        "--mode",
+        "-m",
+        type=str,
+        required=True,
+        choices=["BGR", "RGB"],
+        help="Image color mode",
+    )
+    parser.add_argument(
+        "--iters", type=int, help="Number of iterations to average the results",
+    )
+    args = parser.parse_args()
+
+    benchmark_methods = args.method
+    image_paths = get_images_paths(args.path)
+
+    results = {}
+    for method in benchmark_methods:
+        if method == "lmdb":
+            path = "./lmdb/images"
+            store_many_lmdb(image_paths, path)
+        elif method == "tfrecords":
+            path = "./tfrecords/images.tfrecords"
+            store_many_tfrecords(image_paths, path)
+        else:
+            path = args.path
+
+        time = benchmark(method, path, mode=args.mode, iters=args.iters)
+        results.update({method: time})
+
+    table = PrettyTable(["Loader", "Mean time", "Median time"])
+
+    print(
+        f"Benchmark on {len(image_paths)} {args.mode} images with {args.iters} averaging iteration results:\n",
+    )
+
+    for method, time in results.items():
+        table.add_row([method, time.mean(), np.median(time)])
+    print(table)
@@ -0,0 +1,55 @@
+import os
+from argparse import ArgumentParser
+
+import cv2
+import lmdb
+import numpy as np
+
+from tools import get_images_paths
+
+
+def store_many_lmdb(images_list, save_path):
+
+    num_images = len(images_list)  # number of images in our folder
+
+    file_sizes = [os.path.getsize(item) for item in images_list]  # all file sizes
+    max_size_index = np.argmax(file_sizes)  # the maximum file size index
+
+    # maximum database size in bytes
+    map_size = num_images * cv2.imread(images_list[max_size_index]).nbytes * 10
+
+    env = lmdb.open(save_path, map_size=map_size)  # create lmdb environment
+
+    with env.begin(write=True) as txn:  # start writing to environment
+        for i, image in enumerate(images_list):
+            with open(image, "rb") as file:
+                data = file.read()  # read image as bytes
+                key = f"{i:08}"  # get image key
+                txn.put(key.encode("ascii"), data)  # put the key-value into database
+
+    env.close()  # close the environment
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--path",
+        "-p",
+        type=str,
+        required=True,
+        help="path to the images folder to collect",
+    )
+    parser.add_argument(
+        "--output",
+        "-o",
+        type=str,
+        required=True,
+        help='path to the output environment directory file i.e. "path/to/folder/env/"',
+    )
+
+    args = parser.parse_args()
+    if not os.path.exists(args.output):
+        os.makedirs(args.output)
+
+    images = get_images_paths(args.path)
+    store_many_lmdb(images, args.output)
@@ -0,0 +1,67 @@
+import os
+from argparse import ArgumentParser
+
+import tensorflow as tf
+
+from tools import get_images_paths
+
+
+def _byte_feature(value):
+    """Convert string / byte into bytes_list."""
+    if isinstance(value, type(tf.constant(0))):
+        value = value.numpy()  # BytesList can't unpack string from EagerTensor.
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def _int64_feature(value):
+    """Convert bool / enum / int / uint into int64_list."""
+    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def image_example(image_string, label):
+    feature = {
+        "label": _int64_feature(label),
+        "image_raw": _byte_feature(image_string),
+    }
+    return tf.train.Example(features=tf.train.Features(feature=feature))
+
+
+def store_many_tfrecords(images_list, save_file):
+
+    assert save_file.endswith(
+        ".tfrecords",
+    ), 'File path is wrong, it should contain "*myname*.tfrecords"'
+
+    directory = os.path.dirname(save_file)
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+    with tf.io.TFRecordWriter(save_file) as writer:  # start writer
+        for label, filename in enumerate(images_list):  # cycle by each image path
+            image_string = open(filename, "rb").read()  # read the image as bytes string
+            tf_example = image_example(
+                image_string, label,
+            )  # save the data as tf.Example object
+            writer.write(tf_example.SerializeToString())  # and write it into database
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--path",
+        "-p",
+        type=str,
+        required=True,
+        help="path to the images folder to collect",
+    )
+    parser.add_argument(
+        "--output",
+        "-o",
+        type=str,
+        required=True,
+        help='path to the output tfrecords file i.e. "path/to/folder/myname.tfrecords"',
+    )
+
+    args = parser.parse_args()
+    image_paths = get_images_paths(args.path)
+    store_many_tfrecords(image_paths, args.output)