update TFRecord, preprocess data with Queue and Thread/ add cifar-10 example using TFRecord

zsdonghao · zsdonghao · commit 7594891e759a · 2016-09-28T16:54:32.000+01:00
diff --git a/docs/user/example.rst b/docs/user/example.rst
@@ -16,7 +16,7 @@ Computer Vision
  - Denoising Autoencoder (MNIST). A multi-layer perceptron implementation for MNIST classification task, see ``tutorial_mnist.py`` on `GitHub`_.
  - Stacked Denoising Autoencoder and Fine-Tuning (MNIST). A multi-layer perceptron implementation for MNIST classification task, see ``tutorial_mnist.py`` on `GitHub`_.
  - Convolutional Network (MNIST). A Convolutional neural network implementation for classifying MNIST dataset, see ``tutorial_mnist.py`` on `GitHub`_.
- - Convolutional Network (CIFAR-10). A Convolutional neural network implementation for classifying CIFAR-10 dataset, see ``tutorial_cifar10.py`` on `GitHub`_.
+ - Convolutional Network (CIFAR-10). A Convolutional neural network implementation for classifying CIFAR-10 dataset, see ``tutorial_cifar10.py`` and ``tutorial_cifar10_tfrecord.py``on `GitHub`_.
  - VGG 16 (ImageNet). A Convolutional neural network implementation for classifying ImageNet dataset, see ``tutorial_vgg16.py`` on `GitHub`_.
  - VGG 19 (ImageNet). A Convolutional neural network implementation for classifying ImageNet dataset, see ``tutorial_vgg19.py`` on `GitHub`_.
 
@@ -35,6 +35,7 @@ Reinforcement Learning
 
  - Deep Reinforcement Learning - Pong Game. Teach a machine to play Pong games, see ``tutorial_atari_pong.py`` on `GitHub`_.
 
+
 ..
   Applications
   =============
diff --git a/tensorlayer/ops.py b/tensorlayer/ops.py
@@ -118,7 +118,7 @@ def disable_print():
     sys.stderr = os.devnull
 
 def enable_print():
-    """Enable console output. 
+    """Enable console output.
 
     Example
     --------
@@ -152,7 +152,13 @@ def __exit__(self, type, value, traceback):
 
 
 
-
+def get_site_packages_directory():
+    """Print and return the site-packages directory?
+    """
+    import site
+    loc = site.getsitepackages()
+    print(loc)
+    return loc
 
 
 
diff --git a/tutorial_cifar10.py b/tutorial_cifar10.py
@@ -8,8 +8,12 @@
 import numpy as np
 import time
 
-"""Example of CNN
+"""Example of CNN, CIFAR-10
 
+main_test_cnn_naive       : No distorted image / Low accuracy
+main_test_cnn_advanced    : Uses distorted image / High accurcy but Slow
+tutorial_cifar10_tfrecord : Preparing distorted image with Queue and Thread
+                            / High accurcy and Fast
 """
 
 
@@ -187,8 +191,9 @@ def main_test_cnn_advanced():
     .. Randomly distort the image brightness.
     .. Randomly distort the image contrast.
 
-    To do
-    ------
+    Speed Up
+    ---------
+    see `tutorial_cifar10_tfrecord.py`
     Reading images from disk and distorting them can use a non-trivial amount
     of processing time. To prevent these operations from slowing down training,
     we run them inside 16 separate threads which continuously fill a TensorFlow queue.
diff --git a/tutorial_cifar10_tfrecord.py b/tutorial_cifar10_tfrecord.py
@@ -0,0 +1,308 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+import tensorflow as tf
+import tensorlayer as tl
+from tensorlayer.layers import set_keep
+import numpy as np
+import time
+from PIL import Image
+import os
+import io
+
+"""Reimplementation of the TensorFlow official CIFAR-10 CNN tutorials:
+
+This model has 1,068,298 paramters, after few hours of training with GPU,
+accurcy of 86% was found.
+
+Hint : Set different num_threads according to your machine.
+
+Links
+-------
+.. https://www.tensorflow.org/versions/r0.9/tutorials/deep_cnn/index.html
+.. https://github.com/tensorflow/tensorflow/tree/r0.9/tensorflow/models/image/cifar10
+
+Note
+------
+The optimizers between official code and this code are different.
+
+Description
+-----------
+The images are processed as follows:
+.. They are cropped to 24 x 24 pixels, centrally for evaluation or randomly for training.
+.. They are approximately whitened to make the model insensitive to dynamic range.
+
+For training, we additionally apply a series of random distortions to
+artificially increase the data set size:
+.. Randomly flip the image from left to right.
+.. Randomly distort the image brightness.
+.. Randomly distort the image contrast.
+
+Speed Up
+--------
+Reading images from disk and distorting them can use a non-trivial amount
+of processing time. To prevent these operations from slowing down training,
+we run them inside 16 separate threads which continuously fill a TensorFlow queue.
+"""
+model_file_name = "model_cifar10_tfrecord.ckpt"
+resume = False # load model, resume from previous checkpoint?
+
+## Download data, and convert to TFRecord format, see ```tutorial_tfrecord.py```
+X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(
+                                    shape=(-1, 32, 32, 3), plotable=False)
+
+X_train = np.asarray(X_train, dtype=np.float32)
+y_train = np.asarray(y_train, dtype=np.int64)
+X_test = np.asarray(X_test, dtype=np.float32)
+y_test = np.asarray(y_test, dtype=np.int64)
+
+print('X_train.shape', X_train.shape)   # (50000, 32, 32, 3)
+print('y_train.shape', y_train.shape)   # (50000,)
+print('X_test.shape', X_test.shape)     # (10000, 32, 32, 3)
+print('y_test.shape', y_test.shape)     # (10000,)
+print('X %s   y %s' % (X_test.dtype, y_test.dtype))
+
+def data_to_tfrecord(images, labels, filename):
+    """ Save data into TFRecord """
+    print("Converting data into %s ..." % filename)
+    cwd = os.getcwd()
+    writer = tf.python_io.TFRecordWriter(filename)
+    for index, img in enumerate(images):
+        img_raw = img.tobytes()
+        ## Visualize a image
+        # tl.visualize.frame(np.asarray(img, dtype=np.uint8), second=1, saveable=False, name='frame', fig_idx=1236)
+        label = int(labels[index])
+        # print(label)
+        ## Convert the bytes back to image as follow:
+            # image = Image.frombytes('RGB', (32, 32), img_raw)
+        # image = np.fromstring(img_raw, np.float32)
+        # image = image.reshape([32, 32, 3])
+        # tl.visualize.frame(np.asarray(image, dtype=np.uint8), second=1, saveable=False, name='frame', fig_idx=1236)
+        example = tf.train.Example(features=tf.train.Features(feature={
+            "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
+            'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
+        }))
+        writer.write(example.SerializeToString())  # Serialize To String
+    writer.close()
+
+def read_and_decode(filename, is_train=None):
+    """ Return tensor to read from TFRecord """
+    filename_queue = tf.train.string_input_producer([filename])
+    reader = tf.TFRecordReader()
+    _, serialized_example = reader.read(filename_queue)
+    features = tf.parse_single_example(serialized_example,
+                                       features={
+                                           'label': tf.FixedLenFeature([], tf.int64),
+                                           'img_raw' : tf.FixedLenFeature([], tf.string),
+                                       })
+    # You can do more image distortion here for training data
+    img = tf.decode_raw(features['img_raw'], tf.float32)
+    img = tf.reshape(img, [32, 32, 3])
+    # img = tf.cast(img, tf.float32) #* (1. / 255) - 0.5
+    if is_train == True:
+        # 1. Randomly crop a [height, width] section of the image.
+        img = tf.random_crop(img, [24, 24, 3])
+        # 2. Randomly flip the image horizontally.
+        img = tf.image.random_flip_left_right(img)
+        # 3. Randomly change brightness.
+        img = tf.image.random_brightness(img, max_delta=63)
+        # 4. Randomly change contrast.
+        img = tf.image.random_contrast(img, lower=0.2, upper=1.8)
+        # 5. Subtract off the mean and divide by the variance of the pixels.
+        img = tf.image.per_image_whitening(img)
+    elif is_train == False:
+        # 1. Crop the central [height, width] of the image.
+        img = tf.image.resize_image_with_crop_or_pad(img, 24, 24)
+        # 2. Subtract off the mean and divide by the variance of the pixels.
+        img = tf.image.per_image_whitening(img)
+    elif is_train == None:
+        img = img
+
+    label = tf.cast(features['label'], tf.int32)
+    return img, label
+
+data_to_tfrecord(images=X_train, labels=y_train, filename="train.cifar10")
+data_to_tfrecord(images=X_test, labels=y_test, filename="test.cifar10")
+
+## Example to visualize data
+# img, label = read_and_decode("train.cifar10", None)
+# img_batch, label_batch = tf.train.shuffle_batch([img, label],
+#                                                 batch_size=4,
+#                                                 capacity=50000,
+#                                                 min_after_dequeue=10000,
+#                                                 num_threads=1)
+# print("img_batch   : %s" % img_batch._shape)
+# print("label_batch : %s" % label_batch._shape)
+#
+# init = tf.initialize_all_variables()
+# with tf.Session() as sess:
+#     sess.run(init)
+#     coord = tf.train.Coordinator()
+#     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
+#
+#     for i in range(3):  # number of mini-batch (step)
+#         print("Step %d" % i)
+#         val, l = sess.run([img_batch, label_batch])
+#         # exit()
+#         print(val.shape, l)
+#         tl.visualize.images2d(val, second=1, saveable=False, name='batch'+str(i), dtype=np.uint8, fig_idx=2020121)
+#
+#     coord.request_stop()
+#     coord.join(threads)
+#     sess.close()
+
+# with tf.device('/gpu:1'):
+
+sess = tf.InteractiveSession()
+
+batch_size = 128
+model_file_name = "model_cifar10_advanced.ckpt"
+resume = False # load model, resume from previous checkpoint?
+
+x_train_, y_train_ = read_and_decode("train.cifar10", True)
+x_test_, y_test_   = read_and_decode("test.cifar10", False)
+
+x_train_batch, y_train_batch = tf.train.shuffle_batch([x_train_, y_train_],
+                                                batch_size=batch_size,
+                                                capacity=2000,
+                                                min_after_dequeue=1000,
+                                                num_threads=32) # set the number of threads here
+# for testing, uses batch instead of shuffle_batch
+x_test_batch, y_test_batch = tf.train.batch([x_test_, y_test_],
+                                                batch_size=batch_size,
+                                                capacity=50000,
+                                                num_threads=32)
+
+def inference(x_crop, y_, reuse):
+    with tf.variable_scope("model", reuse=reuse):
+        tl.layers.set_name_reuse(reuse)
+        network = tl.layers.InputLayer(x_crop, name='input_layer')
+        network = tl.layers.Conv2dLayer(network,
+                            act = tf.nn.relu,
+                            shape = [5, 5, 3, 64],  # 64 features for each 5x5x3 patch
+                            strides=[1, 1, 1, 1],
+                            padding='SAME',
+                            W_init=tf.truncated_normal_initializer(stddev=5e-2),
+                            b_init=tf.constant_initializer(value=0.0),
+                            name ='cnn_layer1')     # output: (batch_size, 24, 24, 64)
+        network = tl.layers.PoolLayer(network,
+                            ksize=[1, 3, 3, 1],
+                            strides=[1, 2, 2, 1],
+                            padding='SAME',
+                            pool = tf.nn.max_pool,
+                            name ='pool_layer1',)   # output: (batch_size, 12, 12, 64)
+        network.outputs = tf.nn.lrn(network.outputs, 4, bias=1.0, alpha=0.001 / 9.0,
+                                                        beta=0.75, name='norm1')
+        network = tl.layers.Conv2dLayer(network,
+                            act = tf.nn.relu,
+                            shape = [5, 5, 64, 64], # 64 features for each 5x5 patch
+                            strides=[1, 1, 1, 1],
+                            padding='SAME',
+                            W_init=tf.truncated_normal_initializer(stddev=5e-2),
+                            b_init=tf.constant_initializer(value=0.1),
+                            name ='cnn_layer2')     # output: (batch_size, 12, 12, 64)
+        network.outputs = tf.nn.lrn(network.outputs, 4, bias=1.0, alpha=0.001 / 9.0,
+                                                        beta=0.75, name='norm2')
+        network = tl.layers.PoolLayer(network,
+                            ksize=[1, 3, 3, 1],
+                            strides=[1, 2, 2, 1],
+                            padding='SAME',
+                            pool = tf.nn.max_pool,
+                            name ='pool_layer2')   # output: (batch_size, 6, 6, 64)
+        network = tl.layers.FlattenLayer(network, name='flatten_layer')                        # output: (batch_size, 2304)
+        network = tl.layers.DenseLayer(network, n_units=384, act = tf.nn.relu,
+                            W_init=tf.truncated_normal_initializer(stddev=0.04),
+                            b_init=tf.constant_initializer(value=0.1),
+                            name='relu1')       # output: (batch_size, 384)
+        network = tl.layers.DenseLayer(network, n_units=192, act = tf.nn.relu,
+                            W_init=tf.truncated_normal_initializer(stddev=0.04),
+                            b_init=tf.constant_initializer(value=0.1),
+                            name='relu2')       # output: (batch_size, 192)
+        network = tl.layers.DenseLayer(network, n_units=10, act = tf.identity,
+                            W_init=tf.truncated_normal_initializer(stddev=1/192.0),
+                            b_init = tf.constant_initializer(value=0.0),
+                            name='output_layer')    # output: (batch_size, 10)
+        y = network.outputs
+
+        ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y, y_))
+        # L2 for the MLP, without this, the accuracy will be reduced by 15%.
+        L2 = tf.contrib.layers.l2_regularizer(0.004)(network.all_params[4]) + \
+                tf.contrib.layers.l2_regularizer(0.004)(network.all_params[6])
+        cost = ce + L2
+
+        # correct_prediction = tf.equal(tf.argmax(tf.nn.softmax(y), 1), y_)
+        correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.int32), y_)
+        acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+
+        return cost, acc, network
+
+## You can also use placeholder to feed_dict in data after using
+## val, l = sess.run([x_train_batch, y_train_batch]) to get the data
+# x_crop = tf.placeholder(tf.float32, shape=[batch_size, 24, 24, 3])
+# y_ = tf.placeholder(tf.int32, shape=[batch_size,])
+# cost, acc, network = inference(x_crop, y_, None)
+
+cost, acc, network = inference(x_train_batch, y_train_batch, None)
+cost_test, acc_test, _ = inference(x_test_batch, y_test_batch, True)
+
+## train
+n_epoch = 50000
+learning_rate = 0.0001
+print_freq = 5
+n_step_epoch = int(len(y_train)/batch_size)
+n_step = n_epoch * n_step_epoch
+
+train_params = network.all_params
+train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999,
+    epsilon=1e-08, use_locking=False).minimize(cost)#, var_list=train_params)
+
+sess.run(tf.initialize_all_variables())
+if resume:
+    print("Load existing model " + "!"*10)
+    saver = tf.train.Saver()
+    saver.restore(sess, model_file_name)
+
+network.print_params()
+network.print_layers()
+
+print('   learning_rate: %f' % learning_rate)
+print('   batch_size: %d' % batch_size)
+print('   n_epoch: %d, step in an epoch: %d, total n_step: %d' % (n_epoch, n_step_epoch, n_step))
+
+coord = tf.train.Coordinator()
+threads = tf.train.start_queue_runners(sess=sess, coord=coord)
+# for step in range(n_step):
+step = 1
+for epoch in range(n_epoch):
+    start_time = time.time()
+    train_loss, train_acc, n_batch = 0, 0, 0
+    for s in range(n_step_epoch):
+        ## You can also use placeholder to feed_dict in data after using
+        # val, l = sess.run([x_train_batch, y_train_batch])
+        # tl.visualize.images2d(val, second=3, saveable=False, name='batch', dtype=np.uint8, fig_idx=2020121)
+        # err, ac, _ = sess.run([cost, acc, train_op], feed_dict={x_crop: val, y_: l})
+        err, ac, _ = sess.run([cost, acc, train_op])
+        step += 1
+        train_loss += err; train_acc += ac; n_batch += 1
+
+    if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
+        print("Epoch %d : Step %d-%d of %d took %fs" % (epoch, step, step + n_step_epoch, n_step, time.time() - start_time))
+        print("   train loss: %f" % (train_loss/ n_batch))
+        print("   train acc: %f" % (train_acc/ n_batch))
+
+        test_loss, test_acc, n_batch = 0, 0, 0
+        for _ in range(int(len(y_test)/batch_size)):
+            err, ac = sess.run([cost_test, acc_test])
+            test_loss += err; test_acc += ac; n_batch += 1
+        print("   test loss: %f" % (test_loss/ n_batch))
+        print("   test acc: %f" % (test_acc/ n_batch))
+
+    if (epoch + 1) % (print_freq * 10) == 0:
+        print("Save model " + "!"*10)
+        saver = tf.train.Saver()
+        save_path = saver.save(sess, model_file_name)
+
+coord.request_stop()
+coord.join(threads)
+sess.close()
diff --git a/tutorial_tfrecord.py b/tutorial_tfrecord.py
diff --git a/tutorial_tfrecord2.py b/tutorial_tfrecord2.py