diff --git a/.gitignore b/.gitignore
index 1ebd42e..effc86d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,6 @@ data/sequences/*
 data/train/*
 data/test/*
 data/c3d/*
+
+
+.DS_Store
\ No newline at end of file
diff --git a/data.py b/data.py
index 56ecf95..ba5b990 100644
--- a/data.py
+++ b/data.py
@@ -115,7 +115,7 @@ def split_train_test(self):
                 test.append(item)
         return train, test
 
-    def get_all_sequences_in_memory(self, train_test, data_type):
+    def get_all_sequences_in_memory(self, train_test, data_type,cnn_model_type):
         """
         This is a mirror of our generator, but attempts to load everything into
         memory so we can train way faster.
@@ -137,11 +137,11 @@ def get_all_sequences_in_memory(self, train_test, data_type):
                 sequence = self.build_image_sequence(frames)
 
             else:
-                sequence = self.get_extracted_sequence(data_type, row)
+                sequence = self.get_extracted_sequence(data_type, row,cnn_model_type=cnn_model_type)
 
                 if sequence is None:
                     print("Can't find sequence. Did you generate them?")
-                    raise
+                    raise(IOError)
 
             X.append(sequence)
             y.append(self.get_class_one_hot(row[1]))
@@ -149,7 +149,7 @@ def get_all_sequences_in_memory(self, train_test, data_type):
         return np.array(X), np.array(y)
 
     @threadsafe_generator
-    def frame_generator(self, batch_size, train_test, data_type):
+    def frame_generator(self, batch_size, train_test, data_type,cnn_model_type):
         """Return a generator that we can use to train on. There are
         a couple different things we can return:
 
@@ -182,7 +182,7 @@ def frame_generator(self, batch_size, train_test, data_type):
                     sequence = self.build_image_sequence(frames)
                 else:
                     # Get the sequence from disk.
-                    sequence = self.get_extracted_sequence(data_type, sample)
+                    sequence = self.get_extracted_sequence(data_type, sample,cnn_model_type=cnn_model_type)
 
                     if sequence is None:
                         raise ValueError("Can't find sequence. Did you generate them?")
@@ -196,17 +196,17 @@ def build_image_sequence(self, frames):
         """Given a set of frames (filenames), build our sequence."""
         return [process_image(x, self.image_shape) for x in frames]
 
-    def get_extracted_sequence(self, data_type, sample):
+    def get_extracted_sequence(self, data_type, sample,cnn_model_type):
         """Get the saved extracted features."""
         filename = sample[2]
-        path = os.path.join(self.sequence_path, filename + '-' + str(self.seq_length) + \
+        path = os.path.join(self.sequence_path, filename + '-' + str(self.seq_length) +  '-' + cnn_model_type + \
             '-' + data_type + '.npy')
         if os.path.isfile(path):
             return np.load(path)
         else:
             return None
 
-    def get_frames_by_filename(self, filename, data_type):
+    def get_frames_by_filename(self, filename, data_type,cnn_model_type):
         """Given a filename for one of our samples, return the data
         the model needs to make predictions."""
         # First, find the sample row.
@@ -226,7 +226,7 @@ def get_frames_by_filename(self, filename, data_type):
             sequence = self.build_image_sequence(frames)
         else:
             # Get the sequence from disk.
-            sequence = self.get_extracted_sequence(data_type, sample)
+            sequence = self.get_extracted_sequence(data_type, sample,cnn_model_type=cnn_model_type)
 
             if sequence is None:
                 raise ValueError("Can't find sequence. Did you generate them?")
diff --git a/demo.py b/demo.py
index 7c1c5b2..6b2226b 100644
--- a/demo.py
+++ b/demo.py
@@ -12,7 +12,7 @@
 from data import DataSet
 import numpy as np
 
-def predict(data_type, seq_length, saved_model, image_shape, video_name, class_limit):
+def predict(data_type, seq_length, saved_model, image_shape, video_name, class_limit,cnn_model_type):
     model = load_model(saved_model)
 
     # Get the data and process it.
@@ -23,7 +23,7 @@ def predict(data_type, seq_length, saved_model, image_shape, video_name, class_l
             class_limit=class_limit)
     
     # Extract the sample from the data.
-    sample = data.get_frames_by_filename(video_name, data_type)
+    sample = data.get_frames_by_filename(video_name, data_type,cnn_model_type=cnn_model_type)
 
     # Predict!
     prediction = model.predict(np.expand_dims(sample, axis=0))
@@ -48,7 +48,7 @@ def main():
     # an actual video file, extract frames, generate sequences, etc.
     #video_name = 'v_Archery_g04_c02'
     video_name = 'v_ApplyLipstick_g01_c01'
-
+    cnn_model_type = 'InceptionV3'
     # Chose images or features and image shape based on network.
     if model in ['conv_3d', 'c3d', 'lrcn']:
         data_type = 'images'
@@ -59,7 +59,7 @@ def main():
     else:
         raise ValueError("Invalid model. See train.py for options.")
 
-    predict(data_type, seq_length, saved_model, image_shape, video_name, class_limit)
+    predict(data_type, seq_length, saved_model, image_shape, video_name, class_limit,cnn_model_type)
 
 if __name__ == '__main__':
     main()
diff --git a/extract_features.py b/extract_features.py
index dbc9ee0..e6623aa 100644
--- a/extract_features.py
+++ b/extract_features.py
@@ -20,19 +20,22 @@
 # Set defaults.
 seq_length = 40
 class_limit = None  # Number of classes to extract. Can be 1-101 or None for all.
+# cnn_model_type='InceptionV3'
+cnn_model_type='nasnet'
 
+n_gpu=8
 # Get the dataset.
 data = DataSet(seq_length=seq_length, class_limit=class_limit)
 
 # get the model.
-model = Extractor()
+model = Extractor(cnn_model_type=cnn_model_type, n_gpu=n_gpu)
 
 # Loop through data.
 pbar = tqdm(total=len(data.data))
 for video in data.data:
 
     # Get the path to the sequence for this video.
-    path = os.path.join('data', 'sequences', video[2] + '-' + str(seq_length) + \
+    path = os.path.join('data', 'sequences', video[2] + '-' + str(seq_length) + '-' + cnn_model_type + \
         '-features')  # numpy will auto-append .npy
 
     # Check if we already have it.
@@ -46,11 +49,13 @@
     # Now downsample to just the ones we need.
     frames = data.rescale_list(frames, seq_length)
 
+    #Batch Processing is more efficient on GPU
+    sequence= list(model.extract_batch(frames,cnn_model_type=cnn_model_type))
     # Now loop through and extract features to build the sequence.
-    sequence = []
-    for image in frames:
-        features = model.extract(image)
-        sequence.append(features)
+    # sequence2 = []
+    # for image in frames:
+    #     features = model.extract(image,cnn_model_type=cnn_model_type)
+    #     sequence2.append(features)
 
     # Save the sequence.
     np.save(path, sequence)
diff --git a/extractor.py b/extractor.py
index a3d8cd8..7933d8a 100644
--- a/extractor.py
+++ b/extractor.py
@@ -1,11 +1,12 @@
 from keras.preprocessing import image
-from keras.applications.inception_v3 import InceptionV3, preprocess_input
+from keras.applications import inception_v3, nasnet
 from keras.models import Model, load_model
-from keras.layers import Input
+from keras.utils import multi_gpu_model
+
 import numpy as np
 
 class Extractor():
-    def __init__(self, weights=None):
+    def __init__(self, weights=None, cnn_model_type='nasnet', n_gpu=1):
         """Either load pretrained from imagenet, or load our saved
         weights from our own training."""
 
@@ -13,21 +14,25 @@ def __init__(self, weights=None):
 
         if weights is None:
             # Get model with pretrained weights.
-            base_model = InceptionV3(
-                weights='imagenet',
-                include_top=True
-            )
-
-            # We'll extract features at the final pool layer.
-            self.model = Model(
-                inputs=base_model.input,
-                outputs=base_model.get_layer('avg_pool').output
-            )
+            if cnn_model_type == 'InceptionV3':
+                self.model = inception_v3.InceptionV3(
+                    weights='imagenet',pooling='avg',
+                    include_top=False
+                )
+            elif cnn_model_type == 'nasnet':
+                base_model = nasnet.NASNetLarge(
+                    weights='imagenet',
+                    include_top=True
+                )
+                # issue https://github.com/keras-team/keras/issues/10109
+                self.model = Model(
+                    inputs=base_model.input,
+                    outputs=base_model.get_layer('global_average_pooling2d_1').output
+                )
 
         else:
             # Load the model first.
             self.model = load_model(weights)
-
             # Then remove the top so we get features not predictions.
             # From: https://github.com/fchollet/keras/issues/2371
             self.model.layers.pop()
@@ -36,20 +41,57 @@ def __init__(self, weights=None):
             self.model.output_layers = [self.model.layers[-1]]
             self.model.layers[-1].outbound_nodes = []
 
-    def extract(self, image_path):
-        img = image.load_img(image_path, target_size=(299, 299))
+        if n_gpu>1:
+            self.model = multi_gpu_model(self.model,n_gpu)
+
+    def extract(self, image_path, cnn_model_type='nasnet'):
+        if cnn_model_type== 'InceptionV3':
+            target_size = (299, 299)
+        elif  cnn_model_type== 'nasnet':
+            target_size = (331, 331)
+
+        img = image.load_img(image_path, target_size=target_size)
         x = image.img_to_array(img)
         x = np.expand_dims(x, axis=0)
-        x = preprocess_input(x)
+
+        if cnn_model_type == 'InceptionV3':
+            x = inception_v3.preprocess_input(x)
+        elif cnn_model_type == 'nasnet':
+            x = nasnet.preprocess_input(x)
 
         # Get the prediction.
         features = self.model.predict(x)
 
-        if self.weights is None:
-            # For imagenet/default network:
-            features = features[0]
-        else:
-            # For loaded network:
-            features = features[0]
+        features = features[0]
 
         return features
+
+
+    def extract_batch(self, image_path_list, cnn_model_type='InceptionV3'):
+        if cnn_model_type== 'InceptionV3':
+            target_size = (299, 299,3)
+            # feature_size = 2048
+
+        elif  cnn_model_type== 'nasnet':
+            target_size = (331, 331,3)
+            # feature_size = 4032
+
+        batch_size = len(image_path_list)
+
+        X = np.zeros((batch_size,) + target_size )
+
+        for img_idx, image_path in enumerate(image_path_list):
+            img = image.load_img(image_path, target_size=target_size[0:2])
+            array = image.img_to_array(img)
+            X[img_idx] = array
+        # x = np.expand_dims(x, axis=0)
+
+        if cnn_model_type == 'InceptionV3':
+            X = inception_v3.preprocess_input(X)
+        elif cnn_model_type == 'nasnet':
+            X = nasnet.preprocess_input(X)
+
+        # Get the prediction.
+        features_batch = self.model.predict(X)
+
+        return features_batch
diff --git a/models.py b/models.py
index 9bfbe1f..3b0e117 100644
--- a/models.py
+++ b/models.py
@@ -8,12 +8,34 @@
 from keras.layers.wrappers import TimeDistributed
 from keras.layers.convolutional import (Conv2D, MaxPooling3D, Conv3D,
     MaxPooling2D)
+from keras.utils import multi_gpu_model
+from keras import Model
+
+import tensorflow as tf
+
 from collections import deque
 import sys
 
+# A wrapper class for multi gpu saving and loading
+class ModelMGPU(Model):
+    def __init__(self, ser_model, gpus):
+        pmodel = multi_gpu_model(ser_model, gpus)
+        self.__dict__.update(pmodel.__dict__)
+        self._smodel = ser_model
+
+    def __getattribute__(self, attrname):
+        '''Override load and save methods to be used from the serial-model. The
+        serial-model holds references to the weights in the multi-gpu model.
+        '''
+        # return Model.__getattribute__(self, attrname)
+        if 'load' in attrname or 'save' in attrname:
+            return getattr(self._smodel, attrname)
+
+        return super(ModelMGPU, self).__getattribute__(attrname)
+
 class ResearchModels():
-    def __init__(self, nb_classes, model, seq_length,
-                 saved_model=None, features_length=2048):
+    def __init__(self, nb_classes, model_type, seq_length,
+                 saved_model=None, cnn_feature_size=4032,n_gpus = 8):
         """
         `model` = one of:
             lstm
@@ -40,45 +62,52 @@ def __init__(self, nb_classes, model, seq_length,
 
         # Get the appropriate model.
         if self.saved_model is not None:
-            print("Loading model %s" % self.saved_model)
-            self.model = load_model(self.saved_model)
-        elif model == 'lstm':
+            with tf.device('/cpu:0'):
+                print("Loading model %s" % self.saved_model)
+                serial_model = load_model(self.saved_model)
+        elif model_type == 'lstm':
             print("Loading LSTM model.")
-            self.input_shape = (seq_length, features_length)
-            self.model = self.lstm()
-        elif model == 'lrcn':
+            self.input_shape = (seq_length, cnn_feature_size)
+            serial_model = self.lstm(cnn_feature_size=cnn_feature_size)
+        elif model_type == 'lrcn':
             print("Loading CNN-LSTM model.")
             self.input_shape = (seq_length, 80, 80, 3)
-            self.model = self.lrcn()
-        elif model == 'mlp':
+            serial_model = self.lrcn()
+        elif model_type == 'mlp':
             print("Loading simple MLP.")
-            self.input_shape = (seq_length, features_length)
-            self.model = self.mlp()
-        elif model == 'conv_3d':
+            self.input_shape = (seq_length, cnn_feature_size)
+            serial_model= self.mlp()
+        elif model_type == 'conv_3d':
             print("Loading Conv3D")
             self.input_shape = (seq_length, 80, 80, 3)
-            self.model = self.conv_3d()
-        elif model == 'c3d':
+            serial_model = self.conv_3d()
+        elif model_type == 'c3d':
             print("Loading C3D")
             self.input_shape = (seq_length, 80, 80, 3)
-            self.model = self.c3d()
+            serial_model = self.c3d()
         else:
             print("Unknown network.")
             sys.exit()
 
+        if n_gpus==1:
+            self.model = serial_model
+        else:
+            self.model = ModelMGPU(ser_model=serial_model,gpus=n_gpus)
         # Now compile the network.
-        optimizer = Adam(lr=1e-5, decay=1e-6)
+        optimizer = Adam(lr=1e-5*n_gpus, decay=1e-6)
         self.model.compile(loss='categorical_crossentropy', optimizer=optimizer,
                            metrics=metrics)
 
-        print(self.model.summary())
+        print(serial_model.summary())
 
-    def lstm(self):
+    def lstm(self,cnn_feature_size=4032):
         """Build a simple LSTM network. We pass the extracted features from
         our CNN to this model predomenently."""
         # Model.
         model = Sequential()
-        model.add(LSTM(2048, return_sequences=False,
+
+
+        model.add(LSTM(cnn_feature_size, return_sequences=False,
                        input_shape=self.input_shape,
                        dropout=0.5))
         model.add(Dense(512, activation='relu'))
diff --git a/train.py b/train.py
old mode 100644
new mode 100755
index bceb2ed..1345b4e
--- a/train.py
+++ b/train.py
@@ -9,8 +9,15 @@
 
 def train(data_type, seq_length, model, saved_model=None,
           class_limit=None, image_shape=None,
-          load_to_memory=False, batch_size=32, nb_epoch=100):
+          load_to_memory=False, batch_size=32, nb_epoch=100, n_gpus=8, cnn_model_type='nasnet'):
     # Helper: Save the model.
+    if cnn_model_type == 'InceptionV3':
+        cnn_feature_size = 2048
+    elif cnn_model_type == 'nasnet':
+        cnn_feature_size = 4032
+    else:
+        raise(IOError('invalid cnn_model_type {}'.format(cnn_model_type)))
+
     checkpointer = ModelCheckpoint(
         filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \
             '.{epoch:03d}-{val_loss:.3f}.hdf5'),
@@ -18,10 +25,10 @@ def train(data_type, seq_length, model, saved_model=None,
         save_best_only=True)
 
     # Helper: TensorBoard
-    tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))
+    tb = TensorBoard(log_dir=os.path.join('data', 'logs', model + time.strftime("%Y%m%d_%H%M%S")))
 
     # Helper: Stop when we stop learning.
-    early_stopper = EarlyStopping(patience=5)
+    early_stopper = EarlyStopping(patience=20)
 
     # Helper: Save results.
     timestamp = time.time()
@@ -47,15 +54,16 @@ def train(data_type, seq_length, model, saved_model=None,
 
     if load_to_memory:
         # Get data.
-        X, y = data.get_all_sequences_in_memory('train', data_type)
-        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
+        X, y = data.get_all_sequences_in_memory('train', data_type,cnn_model_type=cnn_model_type)
+        X_test, y_test = data.get_all_sequences_in_memory('test', data_type,cnn_model_type=cnn_model_type)
     else:
         # Get generators.
-        generator = data.frame_generator(batch_size, 'train', data_type)
-        val_generator = data.frame_generator(batch_size, 'test', data_type)
+        generator     = data.frame_generator(batch_size, 'train', data_type,cnn_model_type=cnn_model_type)
+        val_generator = data.frame_generator(batch_size, 'test', data_type,cnn_model_type=cnn_model_type)
 
     # Get the model.
-    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)
+    rm = ResearchModels(len(data.classes), model_type=model, seq_length = seq_length,
+                        saved_model = saved_model, n_gpus=n_gpus, cnn_feature_size= cnn_feature_size)
 
     # Fit!
     if load_to_memory:
@@ -87,10 +95,15 @@ def main():
     model = 'lstm'
     saved_model = None  # None or weights file
     class_limit = None  # int, can be 1-101 or None
+
     seq_length = 40
     load_to_memory = False  # pre-load the sequences into memory
-    batch_size = 32
+    n_gpus = 8
+    batch_size = 32* n_gpus
     nb_epoch = 1000
+    cnn_model_type = 'nasnet'
+
+
 
     # Chose images or features and image shape based on network.
     if model in ['conv_3d', 'c3d', 'lrcn']:
@@ -104,7 +117,7 @@ def main():
 
     train(data_type, seq_length, model, saved_model=saved_model,
           class_limit=class_limit, image_shape=image_shape,
-          load_to_memory=load_to_memory, batch_size=batch_size, nb_epoch=nb_epoch)
+          load_to_memory=load_to_memory, batch_size=batch_size, nb_epoch=nb_epoch,n_gpus=n_gpus,cnn_model_type=cnn_model_type)
 
 if __name__ == '__main__':
     main()
diff --git a/train_cnn.py b/train_cnn.py
old mode 100644
new mode 100755
index dbd188a..d46e7f5
--- a/train_cnn.py
+++ b/train_cnn.py
@@ -17,10 +17,10 @@
 import os.path
 
 data = DataSet()
-
+data_fodler = './data'
 # Helper: Save the model.
 checkpointer = ModelCheckpoint(
-    filepath=os.path.join('data', 'checkpoints', 'inception.{epoch:03d}-{val_loss:.2f}.hdf5'),
+    filepath=os.path.join(data_fodler, 'checkpoints', 'inception.{epoch:03d}-{val_loss:.2f}.hdf5'),
     verbose=1,
     save_best_only=True)
 
@@ -28,7 +28,7 @@
 early_stopper = EarlyStopping(patience=10)
 
 # Helper: TensorBoard
-tensorboard = TensorBoard(log_dir=os.path.join('data', 'logs'))
+tensorboard = TensorBoard(log_dir=os.path.join(data_fodler, 'logs'))
 
 def get_generators():
     train_datagen = ImageDataGenerator(
@@ -42,14 +42,14 @@ def get_generators():
     test_datagen = ImageDataGenerator(rescale=1./255)
 
     train_generator = train_datagen.flow_from_directory(
-        os.path.join('data', 'train'),
+        os.path.join(data_fodler, 'train'),
         target_size=(299, 299),
         batch_size=32,
         classes=data.classes,
         class_mode='categorical')
 
     validation_generator = test_datagen.flow_from_directory(
-        os.path.join('data', 'test'),
+        os.path.join(data_fodler, 'test'),
         target_size=(299, 299),
         batch_size=32,
         classes=data.classes,
diff --git a/validate_rnn.py b/validate_rnn.py
old mode 100644
new mode 100755
index 8368893..71b3181
--- a/validate_rnn.py
+++ b/validate_rnn.py
@@ -7,7 +7,7 @@
 from data import DataSet
 
 def validate(data_type, model, seq_length=40, saved_model=None,
-             class_limit=None, image_shape=None):
+             class_limit=None, image_shape=None,cnn_model_type='InceptionV3'):
     batch_size = 32
 
     # Get the data and process it.
@@ -23,7 +23,7 @@ def validate(data_type, model, seq_length=40, saved_model=None,
             image_shape=image_shape
         )
 
-    val_generator = data.frame_generator(batch_size, 'test', data_type)
+    val_generator = data.frame_generator(batch_size, 'test', data_type,cnn_model_type=cnn_model_type)
 
     # Get the model.
     rm = ResearchModels(len(data.classes), model, seq_length, saved_model)
@@ -39,6 +39,7 @@ def validate(data_type, model, seq_length=40, saved_model=None,
 def main():
     model = 'lstm'
     saved_model = 'data/checkpoints/lstm-features.026-0.239.hdf5'
+    cnn_model_type = 'InceptionV3'
 
     if model == 'conv_3d' or model == 'lrcn':
         data_type = 'images'
@@ -48,7 +49,7 @@ def main():
         image_shape = None
 
     validate(data_type, model, saved_model=saved_model,
-             image_shape=image_shape, class_limit=4)
+             image_shape=image_shape, class_limit=4,cnn_model_type=cnn_model_type)
 
 if __name__ == '__main__':
     main()