diff --git a/.gitignore b/.gitignore index 1ebd42e..effc86d 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ data/sequences/* data/train/* data/test/* data/c3d/* + + +.DS_Store \ No newline at end of file diff --git a/data.py b/data.py index 56ecf95..ba5b990 100644 --- a/data.py +++ b/data.py @@ -115,7 +115,7 @@ def split_train_test(self): test.append(item) return train, test - def get_all_sequences_in_memory(self, train_test, data_type): + def get_all_sequences_in_memory(self, train_test, data_type,cnn_model_type): """ This is a mirror of our generator, but attempts to load everything into memory so we can train way faster. @@ -137,11 +137,11 @@ def get_all_sequences_in_memory(self, train_test, data_type): sequence = self.build_image_sequence(frames) else: - sequence = self.get_extracted_sequence(data_type, row) + sequence = self.get_extracted_sequence(data_type, row,cnn_model_type=cnn_model_type) if sequence is None: print("Can't find sequence. Did you generate them?") - raise + raise(IOError) X.append(sequence) y.append(self.get_class_one_hot(row[1])) @@ -149,7 +149,7 @@ def get_all_sequences_in_memory(self, train_test, data_type): return np.array(X), np.array(y) @threadsafe_generator - def frame_generator(self, batch_size, train_test, data_type): + def frame_generator(self, batch_size, train_test, data_type,cnn_model_type): """Return a generator that we can use to train on. There are a couple different things we can return: @@ -182,7 +182,7 @@ def frame_generator(self, batch_size, train_test, data_type): sequence = self.build_image_sequence(frames) else: # Get the sequence from disk. - sequence = self.get_extracted_sequence(data_type, sample) + sequence = self.get_extracted_sequence(data_type, sample,cnn_model_type=cnn_model_type) if sequence is None: raise ValueError("Can't find sequence. Did you generate them?") @@ -196,17 +196,17 @@ def build_image_sequence(self, frames): """Given a set of frames (filenames), build our sequence.""" return [process_image(x, self.image_shape) for x in frames] - def get_extracted_sequence(self, data_type, sample): + def get_extracted_sequence(self, data_type, sample,cnn_model_type): """Get the saved extracted features.""" filename = sample[2] - path = os.path.join(self.sequence_path, filename + '-' + str(self.seq_length) + \ + path = os.path.join(self.sequence_path, filename + '-' + str(self.seq_length) + '-' + cnn_model_type + \ '-' + data_type + '.npy') if os.path.isfile(path): return np.load(path) else: return None - def get_frames_by_filename(self, filename, data_type): + def get_frames_by_filename(self, filename, data_type,cnn_model_type): """Given a filename for one of our samples, return the data the model needs to make predictions.""" # First, find the sample row. @@ -226,7 +226,7 @@ def get_frames_by_filename(self, filename, data_type): sequence = self.build_image_sequence(frames) else: # Get the sequence from disk. - sequence = self.get_extracted_sequence(data_type, sample) + sequence = self.get_extracted_sequence(data_type, sample,cnn_model_type=cnn_model_type) if sequence is None: raise ValueError("Can't find sequence. Did you generate them?") diff --git a/demo.py b/demo.py index 7c1c5b2..6b2226b 100644 --- a/demo.py +++ b/demo.py @@ -12,7 +12,7 @@ from data import DataSet import numpy as np -def predict(data_type, seq_length, saved_model, image_shape, video_name, class_limit): +def predict(data_type, seq_length, saved_model, image_shape, video_name, class_limit,cnn_model_type): model = load_model(saved_model) # Get the data and process it. @@ -23,7 +23,7 @@ def predict(data_type, seq_length, saved_model, image_shape, video_name, class_l class_limit=class_limit) # Extract the sample from the data. - sample = data.get_frames_by_filename(video_name, data_type) + sample = data.get_frames_by_filename(video_name, data_type,cnn_model_type=cnn_model_type) # Predict! prediction = model.predict(np.expand_dims(sample, axis=0)) @@ -48,7 +48,7 @@ def main(): # an actual video file, extract frames, generate sequences, etc. #video_name = 'v_Archery_g04_c02' video_name = 'v_ApplyLipstick_g01_c01' - + cnn_model_type = 'InceptionV3' # Chose images or features and image shape based on network. if model in ['conv_3d', 'c3d', 'lrcn']: data_type = 'images' @@ -59,7 +59,7 @@ def main(): else: raise ValueError("Invalid model. See train.py for options.") - predict(data_type, seq_length, saved_model, image_shape, video_name, class_limit) + predict(data_type, seq_length, saved_model, image_shape, video_name, class_limit,cnn_model_type) if __name__ == '__main__': main() diff --git a/extract_features.py b/extract_features.py index dbc9ee0..e6623aa 100644 --- a/extract_features.py +++ b/extract_features.py @@ -20,19 +20,22 @@ # Set defaults. seq_length = 40 class_limit = None # Number of classes to extract. Can be 1-101 or None for all. +# cnn_model_type='InceptionV3' +cnn_model_type='nasnet' +n_gpu=8 # Get the dataset. data = DataSet(seq_length=seq_length, class_limit=class_limit) # get the model. -model = Extractor() +model = Extractor(cnn_model_type=cnn_model_type, n_gpu=n_gpu) # Loop through data. pbar = tqdm(total=len(data.data)) for video in data.data: # Get the path to the sequence for this video. - path = os.path.join('data', 'sequences', video[2] + '-' + str(seq_length) + \ + path = os.path.join('data', 'sequences', video[2] + '-' + str(seq_length) + '-' + cnn_model_type + \ '-features') # numpy will auto-append .npy # Check if we already have it. @@ -46,11 +49,13 @@ # Now downsample to just the ones we need. frames = data.rescale_list(frames, seq_length) + #Batch Processing is more efficient on GPU + sequence= list(model.extract_batch(frames,cnn_model_type=cnn_model_type)) # Now loop through and extract features to build the sequence. - sequence = [] - for image in frames: - features = model.extract(image) - sequence.append(features) + # sequence2 = [] + # for image in frames: + # features = model.extract(image,cnn_model_type=cnn_model_type) + # sequence2.append(features) # Save the sequence. np.save(path, sequence) diff --git a/extractor.py b/extractor.py index a3d8cd8..7933d8a 100644 --- a/extractor.py +++ b/extractor.py @@ -1,11 +1,12 @@ from keras.preprocessing import image -from keras.applications.inception_v3 import InceptionV3, preprocess_input +from keras.applications import inception_v3, nasnet from keras.models import Model, load_model -from keras.layers import Input +from keras.utils import multi_gpu_model + import numpy as np class Extractor(): - def __init__(self, weights=None): + def __init__(self, weights=None, cnn_model_type='nasnet', n_gpu=1): """Either load pretrained from imagenet, or load our saved weights from our own training.""" @@ -13,21 +14,25 @@ def __init__(self, weights=None): if weights is None: # Get model with pretrained weights. - base_model = InceptionV3( - weights='imagenet', - include_top=True - ) - - # We'll extract features at the final pool layer. - self.model = Model( - inputs=base_model.input, - outputs=base_model.get_layer('avg_pool').output - ) + if cnn_model_type == 'InceptionV3': + self.model = inception_v3.InceptionV3( + weights='imagenet',pooling='avg', + include_top=False + ) + elif cnn_model_type == 'nasnet': + base_model = nasnet.NASNetLarge( + weights='imagenet', + include_top=True + ) + # issue https://github.com/keras-team/keras/issues/10109 + self.model = Model( + inputs=base_model.input, + outputs=base_model.get_layer('global_average_pooling2d_1').output + ) else: # Load the model first. self.model = load_model(weights) - # Then remove the top so we get features not predictions. # From: https://github.com/fchollet/keras/issues/2371 self.model.layers.pop() @@ -36,20 +41,57 @@ def __init__(self, weights=None): self.model.output_layers = [self.model.layers[-1]] self.model.layers[-1].outbound_nodes = [] - def extract(self, image_path): - img = image.load_img(image_path, target_size=(299, 299)) + if n_gpu>1: + self.model = multi_gpu_model(self.model,n_gpu) + + def extract(self, image_path, cnn_model_type='nasnet'): + if cnn_model_type== 'InceptionV3': + target_size = (299, 299) + elif cnn_model_type== 'nasnet': + target_size = (331, 331) + + img = image.load_img(image_path, target_size=target_size) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) - x = preprocess_input(x) + + if cnn_model_type == 'InceptionV3': + x = inception_v3.preprocess_input(x) + elif cnn_model_type == 'nasnet': + x = nasnet.preprocess_input(x) # Get the prediction. features = self.model.predict(x) - if self.weights is None: - # For imagenet/default network: - features = features[0] - else: - # For loaded network: - features = features[0] + features = features[0] return features + + + def extract_batch(self, image_path_list, cnn_model_type='InceptionV3'): + if cnn_model_type== 'InceptionV3': + target_size = (299, 299,3) + # feature_size = 2048 + + elif cnn_model_type== 'nasnet': + target_size = (331, 331,3) + # feature_size = 4032 + + batch_size = len(image_path_list) + + X = np.zeros((batch_size,) + target_size ) + + for img_idx, image_path in enumerate(image_path_list): + img = image.load_img(image_path, target_size=target_size[0:2]) + array = image.img_to_array(img) + X[img_idx] = array + # x = np.expand_dims(x, axis=0) + + if cnn_model_type == 'InceptionV3': + X = inception_v3.preprocess_input(X) + elif cnn_model_type == 'nasnet': + X = nasnet.preprocess_input(X) + + # Get the prediction. + features_batch = self.model.predict(X) + + return features_batch diff --git a/models.py b/models.py index 9bfbe1f..3b0e117 100644 --- a/models.py +++ b/models.py @@ -8,12 +8,34 @@ from keras.layers.wrappers import TimeDistributed from keras.layers.convolutional import (Conv2D, MaxPooling3D, Conv3D, MaxPooling2D) +from keras.utils import multi_gpu_model +from keras import Model + +import tensorflow as tf + from collections import deque import sys +# A wrapper class for multi gpu saving and loading +class ModelMGPU(Model): + def __init__(self, ser_model, gpus): + pmodel = multi_gpu_model(ser_model, gpus) + self.__dict__.update(pmodel.__dict__) + self._smodel = ser_model + + def __getattribute__(self, attrname): + '''Override load and save methods to be used from the serial-model. The + serial-model holds references to the weights in the multi-gpu model. + ''' + # return Model.__getattribute__(self, attrname) + if 'load' in attrname or 'save' in attrname: + return getattr(self._smodel, attrname) + + return super(ModelMGPU, self).__getattribute__(attrname) + class ResearchModels(): - def __init__(self, nb_classes, model, seq_length, - saved_model=None, features_length=2048): + def __init__(self, nb_classes, model_type, seq_length, + saved_model=None, cnn_feature_size=4032,n_gpus = 8): """ `model` = one of: lstm @@ -40,45 +62,52 @@ def __init__(self, nb_classes, model, seq_length, # Get the appropriate model. if self.saved_model is not None: - print("Loading model %s" % self.saved_model) - self.model = load_model(self.saved_model) - elif model == 'lstm': + with tf.device('/cpu:0'): + print("Loading model %s" % self.saved_model) + serial_model = load_model(self.saved_model) + elif model_type == 'lstm': print("Loading LSTM model.") - self.input_shape = (seq_length, features_length) - self.model = self.lstm() - elif model == 'lrcn': + self.input_shape = (seq_length, cnn_feature_size) + serial_model = self.lstm(cnn_feature_size=cnn_feature_size) + elif model_type == 'lrcn': print("Loading CNN-LSTM model.") self.input_shape = (seq_length, 80, 80, 3) - self.model = self.lrcn() - elif model == 'mlp': + serial_model = self.lrcn() + elif model_type == 'mlp': print("Loading simple MLP.") - self.input_shape = (seq_length, features_length) - self.model = self.mlp() - elif model == 'conv_3d': + self.input_shape = (seq_length, cnn_feature_size) + serial_model= self.mlp() + elif model_type == 'conv_3d': print("Loading Conv3D") self.input_shape = (seq_length, 80, 80, 3) - self.model = self.conv_3d() - elif model == 'c3d': + serial_model = self.conv_3d() + elif model_type == 'c3d': print("Loading C3D") self.input_shape = (seq_length, 80, 80, 3) - self.model = self.c3d() + serial_model = self.c3d() else: print("Unknown network.") sys.exit() + if n_gpus==1: + self.model = serial_model + else: + self.model = ModelMGPU(ser_model=serial_model,gpus=n_gpus) # Now compile the network. - optimizer = Adam(lr=1e-5, decay=1e-6) + optimizer = Adam(lr=1e-5*n_gpus, decay=1e-6) self.model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=metrics) - print(self.model.summary()) + print(serial_model.summary()) - def lstm(self): + def lstm(self,cnn_feature_size=4032): """Build a simple LSTM network. We pass the extracted features from our CNN to this model predomenently.""" # Model. model = Sequential() - model.add(LSTM(2048, return_sequences=False, + + + model.add(LSTM(cnn_feature_size, return_sequences=False, input_shape=self.input_shape, dropout=0.5)) model.add(Dense(512, activation='relu')) diff --git a/train.py b/train.py old mode 100644 new mode 100755 index bceb2ed..1345b4e --- a/train.py +++ b/train.py @@ -9,8 +9,15 @@ def train(data_type, seq_length, model, saved_model=None, class_limit=None, image_shape=None, - load_to_memory=False, batch_size=32, nb_epoch=100): + load_to_memory=False, batch_size=32, nb_epoch=100, n_gpus=8, cnn_model_type='nasnet'): # Helper: Save the model. + if cnn_model_type == 'InceptionV3': + cnn_feature_size = 2048 + elif cnn_model_type == 'nasnet': + cnn_feature_size = 4032 + else: + raise(IOError('invalid cnn_model_type {}'.format(cnn_model_type))) + checkpointer = ModelCheckpoint( filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \ '.{epoch:03d}-{val_loss:.3f}.hdf5'), @@ -18,10 +25,10 @@ def train(data_type, seq_length, model, saved_model=None, save_best_only=True) # Helper: TensorBoard - tb = TensorBoard(log_dir=os.path.join('data', 'logs', model)) + tb = TensorBoard(log_dir=os.path.join('data', 'logs', model + time.strftime("%Y%m%d_%H%M%S"))) # Helper: Stop when we stop learning. - early_stopper = EarlyStopping(patience=5) + early_stopper = EarlyStopping(patience=20) # Helper: Save results. timestamp = time.time() @@ -47,15 +54,16 @@ def train(data_type, seq_length, model, saved_model=None, if load_to_memory: # Get data. - X, y = data.get_all_sequences_in_memory('train', data_type) - X_test, y_test = data.get_all_sequences_in_memory('test', data_type) + X, y = data.get_all_sequences_in_memory('train', data_type,cnn_model_type=cnn_model_type) + X_test, y_test = data.get_all_sequences_in_memory('test', data_type,cnn_model_type=cnn_model_type) else: # Get generators. - generator = data.frame_generator(batch_size, 'train', data_type) - val_generator = data.frame_generator(batch_size, 'test', data_type) + generator = data.frame_generator(batch_size, 'train', data_type,cnn_model_type=cnn_model_type) + val_generator = data.frame_generator(batch_size, 'test', data_type,cnn_model_type=cnn_model_type) # Get the model. - rm = ResearchModels(len(data.classes), model, seq_length, saved_model) + rm = ResearchModels(len(data.classes), model_type=model, seq_length = seq_length, + saved_model = saved_model, n_gpus=n_gpus, cnn_feature_size= cnn_feature_size) # Fit! if load_to_memory: @@ -87,10 +95,15 @@ def main(): model = 'lstm' saved_model = None # None or weights file class_limit = None # int, can be 1-101 or None + seq_length = 40 load_to_memory = False # pre-load the sequences into memory - batch_size = 32 + n_gpus = 8 + batch_size = 32* n_gpus nb_epoch = 1000 + cnn_model_type = 'nasnet' + + # Chose images or features and image shape based on network. if model in ['conv_3d', 'c3d', 'lrcn']: @@ -104,7 +117,7 @@ def main(): train(data_type, seq_length, model, saved_model=saved_model, class_limit=class_limit, image_shape=image_shape, - load_to_memory=load_to_memory, batch_size=batch_size, nb_epoch=nb_epoch) + load_to_memory=load_to_memory, batch_size=batch_size, nb_epoch=nb_epoch,n_gpus=n_gpus,cnn_model_type=cnn_model_type) if __name__ == '__main__': main() diff --git a/train_cnn.py b/train_cnn.py old mode 100644 new mode 100755 index dbd188a..d46e7f5 --- a/train_cnn.py +++ b/train_cnn.py @@ -17,10 +17,10 @@ import os.path data = DataSet() - +data_fodler = './data' # Helper: Save the model. checkpointer = ModelCheckpoint( - filepath=os.path.join('data', 'checkpoints', 'inception.{epoch:03d}-{val_loss:.2f}.hdf5'), + filepath=os.path.join(data_fodler, 'checkpoints', 'inception.{epoch:03d}-{val_loss:.2f}.hdf5'), verbose=1, save_best_only=True) @@ -28,7 +28,7 @@ early_stopper = EarlyStopping(patience=10) # Helper: TensorBoard -tensorboard = TensorBoard(log_dir=os.path.join('data', 'logs')) +tensorboard = TensorBoard(log_dir=os.path.join(data_fodler, 'logs')) def get_generators(): train_datagen = ImageDataGenerator( @@ -42,14 +42,14 @@ def get_generators(): test_datagen = ImageDataGenerator(rescale=1./255) train_generator = train_datagen.flow_from_directory( - os.path.join('data', 'train'), + os.path.join(data_fodler, 'train'), target_size=(299, 299), batch_size=32, classes=data.classes, class_mode='categorical') validation_generator = test_datagen.flow_from_directory( - os.path.join('data', 'test'), + os.path.join(data_fodler, 'test'), target_size=(299, 299), batch_size=32, classes=data.classes, diff --git a/validate_rnn.py b/validate_rnn.py old mode 100644 new mode 100755 index 8368893..71b3181 --- a/validate_rnn.py +++ b/validate_rnn.py @@ -7,7 +7,7 @@ from data import DataSet def validate(data_type, model, seq_length=40, saved_model=None, - class_limit=None, image_shape=None): + class_limit=None, image_shape=None,cnn_model_type='InceptionV3'): batch_size = 32 # Get the data and process it. @@ -23,7 +23,7 @@ def validate(data_type, model, seq_length=40, saved_model=None, image_shape=image_shape ) - val_generator = data.frame_generator(batch_size, 'test', data_type) + val_generator = data.frame_generator(batch_size, 'test', data_type,cnn_model_type=cnn_model_type) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) @@ -39,6 +39,7 @@ def validate(data_type, model, seq_length=40, saved_model=None, def main(): model = 'lstm' saved_model = 'data/checkpoints/lstm-features.026-0.239.hdf5' + cnn_model_type = 'InceptionV3' if model == 'conv_3d' or model == 'lrcn': data_type = 'images' @@ -48,7 +49,7 @@ def main(): image_shape = None validate(data_type, model, saved_model=saved_model, - image_shape=image_shape, class_limit=4) + image_shape=image_shape, class_limit=4,cnn_model_type=cnn_model_type) if __name__ == '__main__': main()