diff --git a/cnn.py b/cnn.py index 2c90619..3967f68 100644 --- a/cnn.py +++ b/cnn.py @@ -6,123 +6,122 @@ #!/usr/bin/python +from util import batch_norm_conv, conv2d, max_pool, weight_variable +from config import cfg +import math import tensorflow.compat.v1 as tf + tf.compat.v1.disable_eager_execution() -import math - -from config import cfg -from util import batch_norm_conv -from util import weight_variable -from util import conv2d -from util import max_pool #################################################################### -#CNN-specific architecture configuration +# CNN-specific architecture configuration #################################################################### -WND_HEIGHT = 64 #Extraction window height -WND_WIDTH = 64 #Extraction window width -WND_SHIFT = WND_WIDTH - 2 #Window shift +WND_HEIGHT = 64 # Extraction window height +WND_WIDTH = 64 # Extraction window width +WND_SHIFT = WND_WIDTH - 2 # Window shift -MPoolLayers_ALL = 5 #Nbr of all maxpool layers -MPoolLayers_H = 2 #Nbr of maxpool in horizontal dimension -LastFilters = 512 #Nbr of feature maps at the last conv layer +MPoolLayers_ALL = 5 # Nbr of all maxpool layers +MPoolLayers_H = 2 # Nbr of maxpool in horizontal dimension +LastFilters = 512 # Nbr of feature maps at the last conv layer #################################################################### FV = int(WND_HEIGHT / math.pow(2, MPoolLayers_ALL)) NFeatures = FV * LastFilters + def CNNLight(X, Training, Scope): - with tf.variable_scope(Scope): + with tf.variable_scope(Scope): - ConvLayer1 = ConvLayer(X, 1, 64, Training, 'ConvLayer1') + ConvLayer1 = ConvLayer(X, 1, 64, Training, 'ConvLayer1') - MPool1 = max_pool(ConvLayer1, ksize=(2, 2), stride=(2, 2)) + MPool1 = max_pool(ConvLayer1, ksize=(2, 2), stride=(2, 2)) - ConvLayer2 = ConvLayer(MPool1, 64, 128, Training, 'ConvLayer2') + ConvLayer2 = ConvLayer(MPool1, 64, 128, Training, 'ConvLayer2') - MPool2 = max_pool(ConvLayer2, ksize=(2, 2), stride=(2, 2)) + MPool2 = max_pool(ConvLayer2, ksize=(2, 2), stride=(2, 2)) - ConvLayer3 = ConvLayer(MPool2, 128, 256, Training, 'ConvLayer3') + ConvLayer3 = ConvLayer(MPool2, 128, 256, Training, 'ConvLayer3') - ConvLayer4 = ConvLayer(ConvLayer3, 256, 256, Training, 'ConvLayer4') + ConvLayer4 = ConvLayer(ConvLayer3, 256, 256, Training, 'ConvLayer4') - MPool4 = max_pool(ConvLayer4, ksize=(2, 1), stride=(2, 1)) + MPool4 = max_pool(ConvLayer4, ksize=(2, 1), stride=(2, 1)) - ConvLayer5 = ConvLayer(MPool4, 256, 512, Training, 'ConvLayer5') + ConvLayer5 = ConvLayer(MPool4, 256, 512, Training, 'ConvLayer5') - ConvLayer6 = ConvLayer(ConvLayer5, 512, 512, Training, 'ConvLayer6') + ConvLayer6 = ConvLayer(ConvLayer5, 512, 512, Training, 'ConvLayer6') - MPool6 = max_pool(ConvLayer6, ksize=(2, 1), stride=(2, 1)) + MPool6 = max_pool(ConvLayer6, ksize=(2, 1), stride=(2, 1)) - ConvLayer7 = ConvLayer(MPool6, 512, 512, Training, 'ConvLayer7') + ConvLayer7 = ConvLayer(MPool6, 512, 512, Training, 'ConvLayer7') - MPool7 = max_pool(ConvLayer7, ksize=(2, 1), stride=(2, 1)) + MPool7 = max_pool(ConvLayer7, ksize=(2, 1), stride=(2, 1)) - MPool7_T = tf.transpose(MPool7, perm=[0,2,1,3]) + MPool7_T = tf.transpose(MPool7, perm=[0, 2, 1, 3]) - MPool7_T_RSH = tf.reshape(MPool7_T, [-1, FV, LastFilters]) + MPool7_T_RSH = tf.reshape(MPool7_T, [-1, FV, LastFilters]) - return tf.reshape(MPool7_T_RSH, [-1, NFeatures]) + return tf.reshape(MPool7_T_RSH, [-1, NFeatures]) def CNN(X, Training, Scope): - with tf.variable_scope(Scope): + with tf.variable_scope(Scope): - ConvLayer1 = ConvLayer(X, 1, 64, Training, 'ConvLayer1') + ConvLayer1 = ConvLayer(X, 1, 64, Training, 'ConvLayer1') - ConvLayer2 = ConvLayer(ConvLayer1, 64, 64, Training, 'ConvLayer2') + ConvLayer2 = ConvLayer(ConvLayer1, 64, 64, Training, 'ConvLayer2') - MPool2 = max_pool(ConvLayer2, ksize=(2, 2), stride=(2, 2)) + MPool2 = max_pool(ConvLayer2, ksize=(2, 2), stride=(2, 2)) - ConvLayer3 = ConvLayer(MPool2, 64, 128, Training, 'ConvLayer3') + ConvLayer3 = ConvLayer(MPool2, 64, 128, Training, 'ConvLayer3') - ConvLayer4 = ConvLayer(ConvLayer3, 128, 128, Training, 'ConvLayer4') + ConvLayer4 = ConvLayer(ConvLayer3, 128, 128, Training, 'ConvLayer4') - MPool4 = max_pool(ConvLayer4, ksize=(2, 2), stride=(2, 2)) + MPool4 = max_pool(ConvLayer4, ksize=(2, 2), stride=(2, 2)) - ConvLayer5 = ConvLayer(MPool4, 128, 256, Training, 'ConvLayer5') + ConvLayer5 = ConvLayer(MPool4, 128, 256, Training, 'ConvLayer5') - ConvLayer6 = ConvLayer(ConvLayer5, 256, 256, Training, 'ConvLayer6') + ConvLayer6 = ConvLayer(ConvLayer5, 256, 256, Training, 'ConvLayer6') - ConvLayer7 = ConvLayer(ConvLayer6, 256, 256, Training, 'ConvLayer7') + ConvLayer7 = ConvLayer(ConvLayer6, 256, 256, Training, 'ConvLayer7') - MPool7 = max_pool(ConvLayer7, ksize=(2, 1), stride=(2, 1)) + MPool7 = max_pool(ConvLayer7, ksize=(2, 1), stride=(2, 1)) - ConvLayer8 = ConvLayer(MPool7, 256, 512, Training, 'ConvLayer8') + ConvLayer8 = ConvLayer(MPool7, 256, 512, Training, 'ConvLayer8') - ConvLayer9 = ConvLayer(ConvLayer8, 512, 512, Training, 'ConvLayer9') + ConvLayer9 = ConvLayer(ConvLayer8, 512, 512, Training, 'ConvLayer9') - ConvLayer10 = ConvLayer(ConvLayer9, 512, 512, Training, 'ConvLayer10') + ConvLayer10 = ConvLayer(ConvLayer9, 512, 512, Training, 'ConvLayer10') - MPool10 = max_pool(ConvLayer10, ksize=(2, 1), stride=(2, 1)) + MPool10 = max_pool(ConvLayer10, ksize=(2, 1), stride=(2, 1)) - ConvLayer11 = ConvLayer(MPool10, 512, 512, Training, 'ConvLayer11') + ConvLayer11 = ConvLayer(MPool10, 512, 512, Training, 'ConvLayer11') - ConvLayer12 = ConvLayer(ConvLayer11, 512, 512, Training, 'ConvLayer12') + ConvLayer12 = ConvLayer(ConvLayer11, 512, 512, Training, 'ConvLayer12') - ConvLayer13 = ConvLayer(ConvLayer12, 512, LastFilters, Training, 'ConvLayer13') + ConvLayer13 = ConvLayer( + ConvLayer12, 512, LastFilters, Training, 'ConvLayer13') - MPool13 = max_pool(ConvLayer13, ksize=(2, 1), stride=(2, 1)) + MPool13 = max_pool(ConvLayer13, ksize=(2, 1), stride=(2, 1)) - MPool13_T = tf.transpose(MPool13, perm=[0,2,1,3]) + MPool13_T = tf.transpose(MPool13, perm=[0, 2, 1, 3]) - MPool13_T_RSH = tf.reshape(MPool13_T, [-1, FV, LastFilters]) + MPool13_T_RSH = tf.reshape(MPool13_T, [-1, FV, LastFilters]) - return tf.reshape(MPool13_T_RSH, [-1, NFeatures]) + return tf.reshape(MPool13_T_RSH, [-1, NFeatures]) -def ConvLayer(Input, FilterIn, FilterOut, Training, Scope): - with tf.variable_scope(Scope): +def ConvLayer(Input, FilterIn, FilterOut, Training, Scope): - Weight = weight_variable([3, 3, FilterIn, FilterOut]) + with tf.variable_scope(Scope): - if cfg.LeakyReLU == True: + Weight = weight_variable([3, 3, FilterIn, FilterOut]) - return tf.nn.leaky_relu(batch_norm_conv(conv2d(Input, Weight), FilterOut, Training)) - else: - return tf.nn.relu(batch_norm_conv(conv2d(Input, Weight), FilterOut, Training)) + if cfg.LeakyReLU == True: + return tf.nn.leaky_relu(batch_norm_conv(conv2d(Input, Weight), FilterOut, Training)) + else: + return tf.nn.relu(batch_norm_conv(conv2d(Input, Weight), FilterOut, Training)) diff --git a/compute_probs.py b/compute_probs.py index 94ce6cb..eff76bd 100644 --- a/compute_probs.py +++ b/compute_probs.py @@ -6,28 +6,23 @@ #!/usr/bin/python -import tensorflow.compat.v1 as tf -tf.compat.v1.disable_eager_execution() - +from util import LoadClasses, LoadList, LoadModel, ReadData +from rnn import RNN +from config import cfg +from cnn import CNN, WND_HEIGHT, WND_WIDTH, MPoolLayers_H +import numpy as np +import cv2 import sys import os -import cv2 -import numpy as np -import codecs import math +import codecs +import tensorflow.compat.v1 as tf + +tf.compat.v1.disable_eager_execution() -from config import cfg -from util import LoadClasses -from util import LoadModel -from util import ReadData -from util import LoadList -from cnn import CNN -from cnn import WND_HEIGHT -from cnn import WND_WIDTH -from cnn import MPoolLayers_H -from rnn import RNN -if (os.path.exists(cfg.Probs) == False): os.makedirs(cfg.Probs) +if (os.path.exists(cfg.Probs) == False): + os.makedirs(cfg.Probs) Classes = LoadClasses(cfg.CHAR_LIST) @@ -53,8 +48,9 @@ logits = tf.nn.softmax(logits, dim=-1, name=None) -#Reading test data... -InputListTest, SeqLensTest, _ = ReadData(cfg.TEST_LOCATION, cfg.TEST_LIST, cfg.TEST_NB, WND_HEIGHT, WND_WIDTH, WND_SHIFT, VEC_PER_WND, '') +# Reading test data... +InputListTest, SeqLensTest, _ = ReadData( + cfg.TEST_LOCATION, cfg.TEST_LIST, cfg.TEST_NB, WND_HEIGHT, WND_WIDTH, WND_SHIFT, VEC_PER_WND, '') print('Initializing...') @@ -65,60 +61,59 @@ LoadModel(session, cfg.SaveDir+'/') try: - session.run(tf.assign(phase_train, False)) + session.run(tf.assign(phase_train, False)) - randIxs = range(0, len(InputListTest)) + randIxs = range(0, len(InputListTest)) - start, end = (0, cfg.BatchSize) + start, end = (0, cfg.BatchSize) - batch = 0 - while end <= len(InputListTest): - batchInputs = [] - batchSeqLengths = [] - for batchI, origI in enumerate(randIxs[start:end]): - batchInputs.extend(InputListTest[origI]) - batchSeqLengths.append(SeqLensTest[origI]) + batch = 0 + while end <= len(InputListTest): + batchInputs = [] + batchSeqLengths = [] + for batchI, origI in enumerate(randIxs[start:end]): + batchInputs.extend(InputListTest[origI]) + batchSeqLengths.append(SeqLensTest[origI]) - feed = {x: batchInputs, SeqLens: batchSeqLengths} - del batchInputs, batchSeqLengths + feed = {x: batchInputs, SeqLens: batchSeqLengths} + del batchInputs, batchSeqLengths - Logits = session.run([logits], feed_dict=feed) - del feed + Logits = session.run([logits], feed_dict=feed) + del feed - _,sLen,_,_ = np.shape(Logits) + _, sLen, _, _ = np.shape(Logits) - for i in range(0, cfg.BatchSize): + for i in range(0, cfg.BatchSize): - fileIndex = cfg.BatchSize * batch + i - filename = "./"+cfg.Probs+"/" + os.path.basename(FilesList[fileIndex].strip()) + ".txt" + fileIndex = cfg.BatchSize * batch + i + filename = "./"+cfg.Probs+"/" + \ + os.path.basename(FilesList[fileIndex].strip()) + ".txt" - file = codecs.open(filename, "a", "utf-8") + file = codecs.open(filename, "a", "utf-8") - for seqn in range(0, sLen): + for seqn in range(0, sLen): - seq = Logits[0][seqn][i] + seq = Logits[0][seqn][i] - file.write(str(seq[NClasses-1])) - file.write(" ") + file.write(str(seq[NClasses-1])) + file.write(" ") - for c in range(0, NClasses-1): - val = seq[c] - file.write(str(val)) - file.write(" ") + for c in range(0, NClasses-1): + val = seq[c] + file.write(str(val)) + file.write(" ") - file.write("\n") + file.write("\n") - file.close + file.close - start += cfg.BatchSize - end += cfg.BatchSize - batch += 1 + start += cfg.BatchSize + end += cfg.BatchSize + batch += 1 except (KeyboardInterrupt, SystemExit, Exception) as e: - print("[Error/Interruption] %s" % str(e)) - print("Clossing TF Session...") - session.close() - print("Terminating Program...") - sys.exit(0) - - + print(f"[Error/Interruption] {str(e)}") + print("Clossing TF Session...") + session.close() + print("Terminating Program...") + sys.exit(0) diff --git a/config.py b/config.py index 1247e62..21dc649 100644 --- a/config.py +++ b/config.py @@ -6,73 +6,95 @@ #!/usr/bin/python +import os import tensorflow.compat.v1 as tf + tf.compat.v1.disable_eager_execution() -import os flags = tf.app.flags ###################################### -#Images and labels file type +# Images and labels file type flags.DEFINE_string('ImageFileType', '.png', 'The file type of images') -flags.DEFINE_string('LabelFileType', '.tru', 'The extension of the file holding the ground-truth labels') +flags.DEFINE_string('LabelFileType', '.tru', + 'The extension of the file holding the ground-truth labels') ###################################### -#Training data configuration +# Training data configuration flags.DEFINE_integer('TRAIN_NB', 20, 'Number of training images to process') -flags.DEFINE_string('TRAIN_LIST', './samples/list', 'List of training data without file extension.') -flags.DEFINE_string('TRAIN_LOCATION', './samples/Images/', 'Location of training data. Could be included in the data list.') -flags.DEFINE_string('TRAIN_TRANS', './samples/Labels/', 'Location of training data transcriptions') +flags.DEFINE_string('TRAIN_LIST', './samples/list', + 'List of training data without file extension.') +flags.DEFINE_string('TRAIN_LOCATION', './samples/Images/', + 'Location of training data. Could be included in the data list.') +flags.DEFINE_string('TRAIN_TRANS', './samples/Labels/', + 'Location of training data transcriptions') ###################################### -#Validation data configuration +# Validation data configuration flags.DEFINE_integer('VAL_NB', 20, 'Number of validation images to process') -flags.DEFINE_string('VAL_LIST', './samples/list', 'List of validation data without file extension.') -flags.DEFINE_string('VAL_LOCATION', './samples/Images/', 'Location of validation data. Could be included in the data list.') -flags.DEFINE_string('VAL_TRANS', './samples/Labels/', 'Location of validation data transcriptions') +flags.DEFINE_string('VAL_LIST', './samples/list', + 'List of validation data without file extension.') +flags.DEFINE_string('VAL_LOCATION', './samples/Images/', + 'Location of validation data. Could be included in the data list.') +flags.DEFINE_string('VAL_TRANS', './samples/Labels/', + 'Location of validation data transcriptions') ###################################### -#Test data configuration +# Test data configuration flags.DEFINE_integer('TEST_NB', 20, 'Number of test images to process') -flags.DEFINE_string('TEST_LIST', './samples/list', 'List of test data without file extension.') -flags.DEFINE_string('TEST_LOCATION', './samples/Images/', 'Location of test data. Could be included in the data list.') -flags.DEFINE_boolean('WriteDecodedToFile', True, 'Write the decoded text to file or stdout?') +flags.DEFINE_string('TEST_LIST', './samples/list', + 'List of test data without file extension.') +flags.DEFINE_string('TEST_LOCATION', './samples/Images/', + 'Location of test data. Could be included in the data list.') +flags.DEFINE_boolean('WriteDecodedToFile', True, + 'Write the decoded text to file or stdout?') ###################################### -#Classes information -flags.DEFINE_string('CHAR_LIST', './samples/CHAR_LIST', 'Sorted list of classes/characters. First one must be ') +# Classes information +flags.DEFINE_string('CHAR_LIST', './samples/CHAR_LIST', + 'Sorted list of classes/characters. First one must be ') ###################################### -#Model and logs files and directories -flags.DEFINE_string('SaveDir', './model', 'Directory where model checkpoints are saved') +# Model and logs files and directories +flags.DEFINE_string('SaveDir', './model', + 'Directory where model checkpoints are saved') flags.DEFINE_string('ModelName', 'model.ckpt', 'Name of the model checkpoints') flags.DEFINE_string('LogFile', './log', 'Log file') -flags.DEFINE_string('LogDir', './summary', 'Directory to store Tensorflow summary information') -flags.DEFINE_string('Probs', './Probs', 'Directory to store posteriors for WFST decoder') +flags.DEFINE_string('LogDir', './summary', + 'Directory to store Tensorflow summary information') +flags.DEFINE_string('Probs', './Probs', + 'Directory to store posteriors for WFST decoder') ###################################### -#CNN parameters +# CNN parameters flags.DEFINE_boolean('LeakyReLU', True, 'Use Leaky ReLU or ReLU') ###################################### -#RNN parameters -flags.DEFINE_integer('NUnits', 256, 'Number of LSTM units per forward/backward layer') +# RNN parameters +flags.DEFINE_integer( + 'NUnits', 256, 'Number of LSTM units per forward/backward layer') flags.DEFINE_integer('NLayers', 3, 'Number of BLSTM layers') ###################################### -#Training parameters -flags.DEFINE_integer('StartingEpoch', 0, 'The epoch number to start training from') # = 0 to train from scratch, != 0 to resume from the latest checkpoint +# Training parameters +# = 0 to train from scratch, != 0 to resume from the latest checkpoint +flags.DEFINE_integer('StartingEpoch', 0, + 'The epoch number to start training from') flags.DEFINE_float('LearningRate', 0.0005, 'Learning rate') -flags.DEFINE_integer('BatchSize', 10, 'Batch size') #This is actually the number of images to process each iteration -flags.DEFINE_boolean('RandomBatches', True, 'Randomize the order of batches each epoch') +# This is actually the number of images to process each iteration +flags.DEFINE_integer('BatchSize', 10, 'Batch size') +flags.DEFINE_boolean('RandomBatches', True, + 'Randomize the order of batches each epoch') flags.DEFINE_integer('MaxGradientNorm', 5, 'Maximum gradient norm') flags.DEFINE_integer('SaveEachNEpochs', 1, 'Save model each n epochs') flags.DEFINE_integer('NEpochs', 1000000, 'Run the training for n epochs') -flags.DEFINE_integer('TrainThreshold', 20, 'Stop the training after n epochs with no improvement on validation') +flags.DEFINE_integer('TrainThreshold', 20, + 'Stop the training after n epochs with no improvement on validation') cfg = flags.FLAGS -if (os.path.exists(cfg.SaveDir) == False): os.makedirs(cfg.SaveDir) -if (os.path.exists(cfg.LogDir) == False): os.makedirs(cfg.LogDir) - +if (os.path.exists(cfg.SaveDir) == False): + os.makedirs(cfg.SaveDir) +if (os.path.exists(cfg.LogDir) == False): + os.makedirs(cfg.LogDir) diff --git a/rnn.py b/rnn.py index d1cd406..f11b33f 100644 --- a/rnn.py +++ b/rnn.py @@ -6,95 +6,110 @@ #!/usr/bin/python -import tensorflow.compat.v1 as tf -tf.compat.v1.disable_eager_execution() +from util import LoadClasses +from config import cfg +from cnn import FV, NFeatures import tensorflow as tf2 - import numpy as np import math +import tensorflow.compat.v1 as tf + +tf.compat.v1.disable_eager_execution() -from config import cfg -from util import LoadClasses -from cnn import FV -from cnn import NFeatures Classes = LoadClasses(cfg.CHAR_LIST) NClasses = len(Classes) -def RNN(Inputs, SeqLens, Scope): - - with tf.variable_scope(Scope): - - ################################################################ - #Construct batch sequences for LSTM - - maxLen = tf.reduce_max(SeqLens, 0) - n = 0; offset = 0 - ndxs = tf.reshape(tf.range(offset, SeqLens[n] + offset), [SeqLens[n], 1]) - res = tf.gather_nd(Inputs, [ndxs]) - res = tf.reshape(res, [-1]) - zero_padding = tf.zeros([NFeatures * maxLen] - tf.shape(res), dtype=res.dtype) - a_padded = tf.concat([res, zero_padding], 0) - result = tf.reshape(a_padded, [maxLen, NFeatures]) - Inputs2 = result - - for n in range(1, cfg.BatchSize): - offset = tf.cumsum(SeqLens)[n-1] - ndxs = tf.reshape(tf.range(offset, SeqLens[n]+offset), [SeqLens[n], 1]) - res = tf.gather_nd(Inputs, [ndxs]) - res = tf.reshape(res, [-1]) - zero_padding = tf.zeros([NFeatures * maxLen] - tf.shape(res), dtype=res.dtype) - a_padded = tf.concat([res, zero_padding], 0) - result = tf.reshape(a_padded, [maxLen, NFeatures]) - Inputs2 = tf.concat([Inputs2, result], 0) - - n = 0 - ndxs = tf.reshape(tf.range(n, cfg.BatchSize * maxLen, maxLen), [cfg.BatchSize, 1]) - Inputs = tf.gather_nd(Inputs2, [ndxs]) - - i = tf.constant(1) - - def condition(i, prev): return tf.less(i, maxLen) - - def body(i, prev): - ndxs = tf.reshape(tf.range(i, cfg.BatchSize * maxLen, maxLen), [cfg.BatchSize, 1]) - result = tf.gather_nd(Inputs2, [ndxs]) - next = tf.concat([prev, result], 0) - return [tf.add(i, 1), next] - - i, Inputs = tf.while_loop(condition, body, [i, Inputs], shape_invariants=[i.get_shape(), tf.TensorShape([None, cfg.BatchSize, NFeatures])]) - - ############################################################### - #Construct LSTM layers - - #initializer = tf.contrib.layers.xavier_initializer() - initializer = tf2.initializers.GlorotUniform() - - stacked_rnn_forward = [] - for i in range(cfg.NLayers): - stacked_rnn_forward.append(tf.nn.rnn_cell.LSTMCell(num_units=cfg.NUnits, initializer=initializer, use_peepholes=True, state_is_tuple=True)) - forward = tf.nn.rnn_cell.MultiRNNCell(stacked_rnn_forward, state_is_tuple=True) - - stacked_rnn_backward = [] - for i in range(cfg.NLayers): - stacked_rnn_backward.append(tf.nn.rnn_cell.LSTMCell(num_units=cfg.NUnits, initializer=initializer, use_peepholes=True, state_is_tuple=True)) - backward = tf.nn.rnn_cell.MultiRNNCell(stacked_rnn_backward, state_is_tuple=True) - - [fw_out, bw_out], _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=forward, cell_bw=backward, inputs=Inputs, time_major=True, dtype=tf.float32,sequence_length=tf.cast(SeqLens, tf.int64)) - - # Reshaping forward, and backward outputs for affine transformation - fw_out = tf.reshape(fw_out,[-1, cfg.NUnits]) - bw_out = tf.reshape(bw_out,[-1, cfg.NUnits]) - - # Linear Layer params - W_fw = tf.Variable(tf.truncated_normal(shape=[cfg.NUnits, NClasses], stddev=np.sqrt(2.0 / cfg.NUnits), dtype=tf.float32), dtype=tf.float32) - W_bw = tf.Variable(tf.truncated_normal(shape=[cfg.NUnits, NClasses], stddev=np.sqrt(2.0 / cfg.NUnits), dtype=tf.float32), dtype=tf.float32) - b_out = tf.constant(0.1,shape=[NClasses], dtype=tf.float32) - - # Perform an affine transformation - logits = tf.add( tf.add( tf.matmul(fw_out,W_fw), tf.matmul(bw_out,W_bw) ), b_out ) - - return tf.reshape(logits, [-1, cfg.BatchSize, NClasses]) +def RNN(Inputs, SeqLens, Scope): + with tf.variable_scope(Scope): + + ################################################################ + # Construct batch sequences for LSTM + + maxLen = tf.reduce_max(SeqLens, 0) + + n = 0 + offset = 0 + ndxs = tf.reshape( + tf.range(offset, SeqLens[n] + offset), [SeqLens[n], 1]) + res = tf.gather_nd(Inputs, [ndxs]) + res = tf.reshape(res, [-1]) + zero_padding = tf.zeros( + [NFeatures * maxLen] - tf.shape(res), dtype=res.dtype) + a_padded = tf.concat([res, zero_padding], 0) + result = tf.reshape(a_padded, [maxLen, NFeatures]) + Inputs2 = result + + for n in range(1, cfg.BatchSize): + offset = tf.cumsum(SeqLens)[n-1] + ndxs = tf.reshape( + tf.range(offset, SeqLens[n]+offset), [SeqLens[n], 1]) + res = tf.gather_nd(Inputs, [ndxs]) + res = tf.reshape(res, [-1]) + zero_padding = tf.zeros( + [NFeatures * maxLen] - tf.shape(res), dtype=res.dtype) + a_padded = tf.concat([res, zero_padding], 0) + result = tf.reshape(a_padded, [maxLen, NFeatures]) + Inputs2 = tf.concat([Inputs2, result], 0) + + n = 0 + ndxs = tf.reshape(tf.range(n, cfg.BatchSize * maxLen, + maxLen), [cfg.BatchSize, 1]) + Inputs = tf.gather_nd(Inputs2, [ndxs]) + + i = tf.constant(1) + + def condition(i, prev): return tf.less(i, maxLen) + + def body(i, prev): + ndxs = tf.reshape(tf.range(i, cfg.BatchSize * + maxLen, maxLen), [cfg.BatchSize, 1]) + result = tf.gather_nd(Inputs2, [ndxs]) + next = tf.concat([prev, result], 0) + return [tf.add(i, 1), next] + + i, Inputs = tf.while_loop(condition, body, [i, Inputs], shape_invariants=[ + i.get_shape(), tf.TensorShape([None, cfg.BatchSize, NFeatures])]) + + ############################################################### + # Construct LSTM layers + + # initializer = tf.contrib.layers.xavier_initializer() + initializer = tf2.initializers.GlorotUniform() + + stacked_rnn_forward = [] + for i in range(cfg.NLayers): + stacked_rnn_forward.append(tf.nn.rnn_cell.LSTMCell( + num_units=cfg.NUnits, initializer=initializer, use_peepholes=True, state_is_tuple=True)) + forward = tf.nn.rnn_cell.MultiRNNCell( + stacked_rnn_forward, state_is_tuple=True) + + stacked_rnn_backward = [] + for i in range(cfg.NLayers): + stacked_rnn_backward.append(tf.nn.rnn_cell.LSTMCell( + num_units=cfg.NUnits, initializer=initializer, use_peepholes=True, state_is_tuple=True)) + backward = tf.nn.rnn_cell.MultiRNNCell( + stacked_rnn_backward, state_is_tuple=True) + + [fw_out, bw_out], _ = tf.nn.bidirectional_dynamic_rnn( + cell_fw=forward, cell_bw=backward, inputs=Inputs, time_major=True, dtype=tf.float32, sequence_length=tf.cast(SeqLens, tf.int64)) + + # Reshaping forward, and backward outputs for affine transformation + fw_out = tf.reshape(fw_out, [-1, cfg.NUnits]) + bw_out = tf.reshape(bw_out, [-1, cfg.NUnits]) + + # Linear Layer params + W_fw = tf.Variable(tf.truncated_normal(shape=[cfg.NUnits, NClasses], stddev=np.sqrt( + 2.0 / cfg.NUnits), dtype=tf.float32), dtype=tf.float32) + W_bw = tf.Variable(tf.truncated_normal(shape=[cfg.NUnits, NClasses], stddev=np.sqrt( + 2.0 / cfg.NUnits), dtype=tf.float32), dtype=tf.float32) + b_out = tf.constant(0.1, shape=[NClasses], dtype=tf.float32) + + # Perform an affine transformation + logits = tf.add(tf.add(tf.matmul(fw_out, W_fw), + tf.matmul(bw_out, W_bw)), b_out) + + return tf.reshape(logits, [-1, cfg.BatchSize, NClasses]) diff --git a/test.py b/test.py index 6e5fe64..8caced7 100644 --- a/test.py +++ b/test.py @@ -1,4 +1,17 @@ from __future__ import print_function +from util import LoadClasses, LoadList, LoadModel, ReadData +from rnn import RNN +from config import cfg +from cnn import CNN, WND_HEIGHT, WND_WIDTH, MPoolLayers_H +import numpy as np +import cv2 +import sys +import os +import math +import codecs + +import tensorflow.compat.v1 as tf + ### # Copyright 2018 Edgard Chammas. All Rights Reserved. # Licensed under the Creative Commons Attribution-NonCommercial International Public License, Version 4.0. @@ -7,36 +20,18 @@ #!/usr/bin/python -import tensorflow.compat.v1 as tf tf.compat.v1.disable_eager_execution() -import sys -import os -import cv2 -import numpy as np -import codecs -import math try: - reload(sys) # Python 2 - sys.setdefaultencoding('utf8') + reload(sys) # Python 2 + sys.setdefaultencoding('utf8') except NameError: - pass # Python 3 - -from config import cfg -from util import LoadClasses -from util import LoadModel -from util import ReadData -from util import LoadList -from cnn import CNN -from cnn import WND_HEIGHT -from cnn import WND_WIDTH -from cnn import MPoolLayers_H -from rnn import RNN + pass # Python 3 if cfg.WriteDecodedToFile == True: - DecodeLog = codecs.open("decoded.txt", "w", "utf-8") + DecodeLog = codecs.open("decoded.txt", "w", "utf-8") Classes = LoadClasses(cfg.CHAR_LIST) @@ -63,8 +58,9 @@ # CTC Beam Search Decoder to decode pred string from the prob map decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits, SeqLens) -#Reading test data... -InputListTest, SeqLensTest, _ = ReadData(cfg.TEST_LOCATION, cfg.TEST_LIST, cfg.TEST_NB, WND_HEIGHT, WND_WIDTH, WND_SHIFT, VEC_PER_WND, '') +# Reading test data... +InputListTest, SeqLensTest, _ = ReadData( + cfg.TEST_LOCATION, cfg.TEST_LIST, cfg.TEST_NB, WND_HEIGHT, WND_WIDTH, WND_SHIFT, VEC_PER_WND, '') print('Initializing...') @@ -75,65 +71,69 @@ LoadModel(session, cfg.SaveDir+'/') try: - session.run(tf.assign(phase_train, False)) - - randIxs = range(0, len(InputListTest)) - - start, end = (0, cfg.BatchSize) - - batch = 0 - while end <= len(InputListTest): - batchInputs = [] - batchSeqLengths = [] - for batchI, origI in enumerate(randIxs[start:end]): - batchInputs.extend(InputListTest[origI]) - batchSeqLengths.append(SeqLensTest[origI]) - - feed = {x: batchInputs, SeqLens: batchSeqLengths} - del batchInputs, batchSeqLengths - - Decoded = session.run([decoded], feed_dict=feed)[0] - del feed - - trans = session.run(tf.sparse_tensor_to_dense(Decoded[0])) - - for i in range(0, cfg.BatchSize): - - fileIndex = cfg.BatchSize * batch + i - filename = FilesList[fileIndex].strip() - decodedStr = " " - - for j in range(0, len(trans[i])): - if trans[i][j] == 0: - if (j != (len(trans[i]) - 1)): - if trans[i][j+1] == 0: break - else: decodedStr = "%s%s" % (decodedStr, Classes[trans[i][j]]) - else: - break - else: - if trans[i][j] == (NClasses - 2): - if (j != 0): decodedStr = "%s " % (decodedStr) - else: continue - else: - decodedStr = "%s%s" % (decodedStr, Classes[trans[i][j]]) - - decodedStr = decodedStr.replace("", " ") - - decodedStr = filename + decodedStr[:] + "\n" - if cfg.WriteDecodedToFile == True: DecodeLog.write(decodedStr) - else: print(decodedStr, end=' ') - - start += cfg.BatchSize - end += cfg.BatchSize - batch += 1 - - DecodeLog.close() + session.run(tf.assign(phase_train, False)) + + randIxs = range(0, len(InputListTest)) + + start, end = (0, cfg.BatchSize) + + batch = 0 + while end <= len(InputListTest): + batchInputs = [] + batchSeqLengths = [] + for batchI, origI in enumerate(randIxs[start:end]): + batchInputs.extend(InputListTest[origI]) + batchSeqLengths.append(SeqLensTest[origI]) + + feed = {x: batchInputs, SeqLens: batchSeqLengths} + del batchInputs, batchSeqLengths + + Decoded = session.run([decoded], feed_dict=feed)[0] + del feed + + trans = session.run(tf.sparse_tensor_to_dense(Decoded[0])) + + for i in range(0, cfg.BatchSize): + + fileIndex = cfg.BatchSize * batch + i + filename = FilesList[fileIndex].strip() + decodedStr = " " + + for j in range(0, len(trans[i])): + if trans[i][j] == 0: + if (j != (len(trans[i]) - 1)): + if trans[i][j+1] == 0: + break + else: + decodedStr = f"{decodedStr}{Classes[trans[i][j]]}" + else: + break + else: + if trans[i][j] == (NClasses - 2): + if (j != 0): + decodedStr = f"{decodedStr} " + else: + continue + else: + decodedStr = f"{decodedStr}{Classes[trans[i][j]]}" + + decodedStr = decodedStr.replace("", " ") + + decodedStr = filename + decodedStr[:] + "\n" + if cfg.WriteDecodedToFile == True: + DecodeLog.write(decodedStr) + else: + print(decodedStr, end=' ') + + start += cfg.BatchSize + end += cfg.BatchSize + batch += 1 + + DecodeLog.close() except (KeyboardInterrupt, SystemExit, Exception) as e: - print("[Error/Interruption] %s" % str(e)) - print("Clossing TF Session...") - session.close() - print("Terminating Program...") - sys.exit(0) - - + print(f"[Error/Interruption] {str(e)}") + print("Clossing TF Session...") + session.close() + print("Terminating Program...") + sys.exit(0) diff --git a/train.py b/train.py index 9232de3..d314591 100644 --- a/train.py +++ b/train.py @@ -6,33 +6,26 @@ #!/usr/bin/python -import tensorflow.compat.v1 as tf -tf.compat.v1.disable_eager_execution() - -import sys -import cv2 +from util import LoadModel, ReadData, SaveModel, target_list_to_sparse_tensor +from rnn import RNN +from config import cfg +from cnn import CNN, WND_HEIGHT, WND_SHIFT, WND_WIDTH, CNNLight, MPoolLayers_H import numpy as np -import codecs +import cv2 +import sys import math +import codecs +import tensorflow.compat.v1 as tf + +tf.compat.v1.disable_eager_execution() -from config import cfg -from util import LoadModel -from util import SaveModel -from util import ReadData -from util import target_list_to_sparse_tensor -from cnn import CNN -from cnn import CNNLight -from cnn import WND_HEIGHT -from cnn import WND_WIDTH -from cnn import WND_SHIFT -from cnn import MPoolLayers_H -from rnn import RNN VEC_PER_WND = WND_WIDTH / math.pow(2, MPoolLayers_H) nTimesNoProgress = 0 -currTrainLoss = 1e6; currValLoss = 1e6 +currTrainLoss = 1e6 +currValLoss = 1e6 totalIter = cfg.TRAIN_NB / cfg.BatchSize @@ -46,7 +39,7 @@ x_expanded = tf.expand_dims(x, 3) -#Inputs = CNNLight(x_expanded, phase_train, 'CNN_1') +# Inputs = CNNLight(x_expanded, phase_train, 'CNN_1') Inputs = CNN(x_expanded, phase_train, 'CNN_1') logits = RNN(Inputs, SeqLens, 'RNN_1') @@ -54,7 +47,7 @@ # Target params indices = tf.placeholder(dtype=tf.int64, shape=[None, 2]) values = tf.placeholder(dtype=tf.int32, shape=[None]) -shape = tf.placeholder(dtype=tf.int64,shape=[2]) +shape = tf.placeholder(dtype=tf.int64, shape=[2]) # Make targets targets = tf.SparseTensor(indices, values, shape) @@ -71,33 +64,46 @@ predicted = tf.to_int32(decoded[0]) -error_rate = tf.reduce_sum(tf.edit_distance(predicted, targets, normalize=False)) / tf.to_float(tf.size(targets.values)) +error_rate = tf.reduce_sum(tf.edit_distance( + predicted, targets, normalize=False)) / tf.to_float(tf.size(targets.values)) TrainError_s = tf.summary.scalar('TrainError', error_rate) tvars = tf.trainable_variables() -grad, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), cfg.MaxGradientNorm) +grad, _ = tf.clip_by_global_norm( + tf.gradients(loss, tvars), cfg.MaxGradientNorm) optimizer = tf.train.AdamOptimizer(learning_rate=cfg.LearningRate) train_step = optimizer.apply_gradients(zip(grad, tvars)) -#These values are used to draw performance graphs. Updated after each epoch. -OverallTrainingLoss = tf.Variable(0, name='OverallTrainingLoss', dtype=tf.float32) -OverallTrainingError = tf.Variable(0, name='OverallTrainingError', dtype=tf.float32) -OverallValidationLoss = tf.Variable(0, name='OverallValidationLoss', dtype=tf.float32) -OverallValidationError = tf.Variable(0, name='OverallValidationError', dtype=tf.float32) -OverallTrainingLoss_s = tf.summary.scalar('OverallTrainingLoss', OverallTrainingLoss) -OverallTrainingError_s = tf.summary.scalar('OverallTrainingError', OverallTrainingError) -OverallValidationLoss_s = tf.summary.scalar('OverallValidationLoss', OverallValidationLoss) -OverallValidationError_s = tf.summary.scalar('OverallValidationError', OverallValidationError) - -#Reading training data... -inputList, seqLens, targetList = ReadData(cfg.TRAIN_LOCATION, cfg.TRAIN_LIST, cfg.TRAIN_NB, WND_HEIGHT, WND_WIDTH, WND_SHIFT, VEC_PER_WND, cfg.TRAIN_TRANS) - -#Reading validation data... -if (cfg.VAL_NB > 0): inputListVal, seqLensVal, targetListVal = ReadData(cfg.VAL_LOCATION, cfg.VAL_LIST, cfg.VAL_NB, WND_HEIGHT, WND_WIDTH, WND_SHIFT, VEC_PER_WND, cfg.VAL_TRANS) +# These values are used to draw performance graphs. Updated after each epoch. +OverallTrainingLoss = tf.Variable( + 0, name='OverallTrainingLoss', dtype=tf.float32) +OverallTrainingError = tf.Variable( + 0, name='OverallTrainingError', dtype=tf.float32) +OverallValidationLoss = tf.Variable( + 0, name='OverallValidationLoss', dtype=tf.float32) +OverallValidationError = tf.Variable( + 0, name='OverallValidationError', dtype=tf.float32) +OverallTrainingLoss_s = tf.summary.scalar( + 'OverallTrainingLoss', OverallTrainingLoss) +OverallTrainingError_s = tf.summary.scalar( + 'OverallTrainingError', OverallTrainingError) +OverallValidationLoss_s = tf.summary.scalar( + 'OverallValidationLoss', OverallValidationLoss) +OverallValidationError_s = tf.summary.scalar( + 'OverallValidationError', OverallValidationError) + +# Reading training data... +inputList, seqLens, targetList = ReadData( + cfg.TRAIN_LOCATION, cfg.TRAIN_LIST, cfg.TRAIN_NB, WND_HEIGHT, WND_WIDTH, WND_SHIFT, VEC_PER_WND, cfg.TRAIN_TRANS) + +# Reading validation data... +if (cfg.VAL_NB > 0): + inputListVal, seqLensVal, targetListVal = ReadData( + cfg.VAL_LOCATION, cfg.VAL_LIST, cfg.VAL_NB, WND_HEIGHT, WND_WIDTH, WND_SHIFT, VEC_PER_WND, cfg.VAL_TRANS) # Starting everything... LogFile.write("Initializing...\n\n") @@ -109,170 +115,189 @@ LocalTrainSummary = tf.summary.merge([TrainLoss_s, TrainError_s]) -OverallSummary = tf.summary.merge([OverallTrainingLoss_s, OverallTrainingError_s, OverallValidationLoss_s, OverallValidationError_s]) +OverallSummary = tf.summary.merge( + [OverallTrainingLoss_s, OverallTrainingError_s, OverallValidationLoss_s, OverallValidationError_s]) SummaryWriter = tf.summary.FileWriter(cfg.LogDir, session.graph) -if cfg.StartingEpoch != 0: LoadModel(session, cfg.SaveDir+'/') +if cfg.StartingEpoch != 0: + LoadModel(session, cfg.SaveDir+'/') try: - for epoch in range(cfg.StartingEpoch, cfg.NEpochs): - - LogFile.write("######################################################\n") - LogFile.write("Training Data\n") - LogFile.flush() + for epoch in range(cfg.StartingEpoch, cfg.NEpochs): - TrainingLoss = [] - TrainingError = [] + LogFile.write( + "######################################################\n") + LogFile.write("Training Data\n") + LogFile.flush() - if cfg.RandomBatches == True: randIxs = np.random.permutation(len(inputList)) - else: randIxs = range(0, len(inputList)) + TrainingLoss = [] + TrainingError = [] - start, end = (0, cfg.BatchSize) + if cfg.RandomBatches == True: + randIxs = np.random.permutation(len(inputList)) + else: + randIxs = range(0, len(inputList)) - session.run(tf.assign(phase_train, True)) + start, end = (0, cfg.BatchSize) - batch = 0 - while end <= len(inputList): + session.run(tf.assign(phase_train, True)) - batchInputs = [] - batchTargetList = [] - batchSeqLengths = [] + batch = 0 + while end <= len(inputList): - for batchI, origI in enumerate(randIxs[start:end]): - batchInputs.extend(inputList[origI]) - batchTargetList.append(targetList[origI]) - batchSeqLengths.append(seqLens[origI]) + batchInputs = [] + batchTargetList = [] + batchSeqLengths = [] - batchTargetSparse = target_list_to_sparse_tensor(batchTargetList) - batchTargetIxs, batchTargetVals, batchTargetShape = batchTargetSparse + for batchI, origI in enumerate(randIxs[start:end]): + batchInputs.extend(inputList[origI]) + batchTargetList.append(targetList[origI]) + batchSeqLengths.append(seqLens[origI]) - feed = {x: batchInputs, SeqLens: batchSeqLengths, indices: batchTargetIxs, values: batchTargetVals, shape: batchTargetShape} - del batchInputs, batchTargetIxs, batchTargetVals, batchTargetShape, batchSeqLengths + batchTargetSparse = target_list_to_sparse_tensor(batchTargetList) + batchTargetIxs, batchTargetVals, batchTargetShape = batchTargetSparse - _, summary, Losses, Loss, Error = session.run([train_step, LocalTrainSummary, losses, loss, error_rate], feed_dict=feed) - del feed + feed = {x: batchInputs, SeqLens: batchSeqLengths, indices: batchTargetIxs, + values: batchTargetVals, shape: batchTargetShape} + del batchInputs, batchTargetIxs, batchTargetVals, batchTargetShape, batchSeqLengths - SummaryWriter.add_summary(summary, epoch*totalIter + batch) - SummaryWriter.flush() - - numberOfInfElements = np.count_nonzero(np.isinf(Losses)) - if numberOfInfElements > 0: - LogFile.write("WARNING: INF VALUE(S) FOUND!\n") - LogFile.write("%s\n" % (batchTargetList[np.where(np.isinf(Losses)==True)[0][0]])) - LogFile.write("Losses\n") - LogFile.flush() - Losses = filter(lambda v: ~np.isinf(v), Losses) - Loss = np.mean(Losses) + _, summary, Losses, Loss, Error = session.run( + [train_step, LocalTrainSummary, losses, loss, error_rate], feed_dict=feed) + del feed - TrainingLoss.append(Loss) - TrainingError.append(Error) + SummaryWriter.add_summary(summary, epoch*totalIter + batch) + SummaryWriter.flush() - LogFile.write("Epoch %d, Batch: %d, Loss: %.6f, Error: %.6f, " % (epoch, batch, Loss, Error)) + numberOfInfElements = np.count_nonzero(np.isinf(Losses)) + if numberOfInfElements > 0: + LogFile.write("WARNING: INF VALUE(S) FOUND!\n") + LogFile.write( + f"{batchTargetList[np.where(np.isinf(Losses) == True)[0][0]]}\n") + LogFile.write("Losses\n") + LogFile.flush() + Losses = filter(lambda v: ~np.isinf(v), Losses) + Loss = np.mean(Losses) - if currTrainLoss < Loss: LogFile.write("Bad\n") - else: LogFile.write("Good\n") + TrainingLoss.append(Loss) + TrainingError.append(Error) - LogFile.flush() + LogFile.write("Epoch %d, Batch: %d, Loss: %.6f, Error: %.6f, " % ( + epoch, batch, Loss, Error)) - start += cfg.BatchSize - end += cfg.BatchSize - batch += 1 + if currTrainLoss < Loss: + LogFile.write("Bad\n") + else: + LogFile.write("Good\n") - TrainingLoss = np.mean(TrainingLoss) - TrainingError = np.mean(TrainingError) + LogFile.flush() - LogFile.write("Training loss: %.6f, Training error: %.6f\n" % (TrainingLoss, TrainingError) ) + start += cfg.BatchSize + end += cfg.BatchSize + batch += 1 - if TrainingLoss < currTrainLoss: - currTrainLoss = TrainingLoss - LogFile.write("Training imporving.\n") - else: - LogFile.write("Training not imporving.\n") + TrainingLoss = np.mean(TrainingLoss) + TrainingError = np.mean(TrainingError) - LogFile.flush() + LogFile.write( + f"Training loss: {TrainingLoss:.6f}, Training error: {TrainingError:.6f}\n") - if (epoch + 1) % cfg.SaveEachNEpochs == 0: - SaveModel(session, cfg.SaveDir+'/'+cfg.ModelName, epoch) + if TrainingLoss < currTrainLoss: + currTrainLoss = TrainingLoss + LogFile.write("Training imporving.\n") + else: + LogFile.write("Training not imporving.\n") - if (cfg.VAL_NB > 0): + LogFile.flush() - LogFile.write("\nValidation Data\n"); - LogFile.flush() + if (epoch + 1) % cfg.SaveEachNEpochs == 0: + SaveModel(session, cfg.SaveDir+'/'+cfg.ModelName, epoch) - session.run(tf.assign(phase_train, False)) + if (cfg.VAL_NB > 0): - ValidationError = [] - ValidationLoss = [] + LogFile.write("\nValidation Data\n") + LogFile.flush() - randIxs = range(0, len(inputListVal)) - start, end = (0, cfg.BatchSize) + session.run(tf.assign(phase_train, False)) - batch = 0 - while end <= len(inputListVal): + ValidationError = [] + ValidationLoss = [] - batchInputs = [] - batchTargetList = [] - batchSeqLengths = [] + randIxs = range(0, len(inputListVal)) + start, end = (0, cfg.BatchSize) - for batchI, origI in enumerate(randIxs[start:end]): - batchInputs.extend(inputListVal[origI]) - batchTargetList.append(targetListVal[origI]) - batchSeqLengths.append(seqLensVal[origI]) + batch = 0 + while end <= len(inputListVal): - batchTargetSparse = target_list_to_sparse_tensor(batchTargetList) - batchTargetIxs, batchTargetVals, batchTargetShape = batchTargetSparse - - feed = {x: batchInputs, SeqLens: batchSeqLengths, indices: batchTargetIxs, values: batchTargetVals, shape: batchTargetShape} - del batchInputs, batchTargetIxs, batchTargetVals, batchTargetShape, batchSeqLengths + batchInputs = [] + batchTargetList = [] + batchSeqLengths = [] - Loss, Error = session.run([loss, error_rate], feed_dict=feed) - del feed + for batchI, origI in enumerate(randIxs[start:end]): + batchInputs.extend(inputListVal[origI]) + batchTargetList.append(targetListVal[origI]) + batchSeqLengths.append(seqLensVal[origI]) - ValidationError.append(Error) - ValidationLoss.append(Loss) + batchTargetSparse = target_list_to_sparse_tensor( + batchTargetList) + batchTargetIxs, batchTargetVals, batchTargetShape = batchTargetSparse - LogFile.write("Batch: %d, Loss: %.6f, Error: %.6f\n" % (batch, Loss, Error)) - LogFile.flush() + feed = {x: batchInputs, SeqLens: batchSeqLengths, indices: batchTargetIxs, + values: batchTargetVals, shape: batchTargetShape} + del batchInputs, batchTargetIxs, batchTargetVals, batchTargetShape, batchSeqLengths - start += cfg.BatchSize - end += cfg.BatchSize - batch += 1 + Loss, Error = session.run([loss, error_rate], feed_dict=feed) + del feed - ValidationLoss = np.mean(ValidationLoss) - ValidationError = np.mean(ValidationError) + ValidationError.append(Error) + ValidationLoss.append(Loss) - LogFile.write("Validation loss: %.6f, Validation error: %.6f\n" % (ValidationLoss, ValidationError)) - LogFile.flush() + LogFile.write("Batch: %d, Loss: %.6f, Error: %.6f\n" % + (batch, Loss, Error)) + LogFile.flush() - feed = {OverallTrainingLoss: TrainingLoss, OverallTrainingError: TrainingError, OverallValidationLoss: ValidationLoss, OverallValidationError: ValidationError} - - SummaryWriter.add_summary(session.run([OverallSummary], feed_dict = feed)[0], epoch) - SummaryWriter.flush() - del feed + start += cfg.BatchSize + end += cfg.BatchSize + batch += 1 - if ValidationLoss < currValLoss: - LogFile.write("Validation improving.\n") - nTimesNoProgress = 0 - currValLoss = ValidationLoss - else: - LogFile.write("Validation not improving.\n") - nTimesNoProgress = nTimesNoProgress + 1 - if nTimesNoProgress == cfg.TrainThreshold: - session.close() - LogFile.write("No progress on validation. Terminating program.\n") - sys.exit(0) + ValidationLoss = np.mean(ValidationLoss) + ValidationError = np.mean(ValidationError) - LogFile.write("######################################################\n\n") + LogFile.write( + f"Validation loss: {ValidationLoss:.6f}, Validation error: {ValidationError:.6f}\n") + LogFile.flush() - LogFile.flush() + feed = {OverallTrainingLoss: TrainingLoss, OverallTrainingError: TrainingError, + OverallValidationLoss: ValidationLoss, OverallValidationError: ValidationError} -except (KeyboardInterrupt, SystemExit, Exception) as e: - print("[Error/Interruption] %s\n" % str(e)) - LogFile.write("[Error/Interruption] %s\n" % str(e)) - LogFile.write("Clossing TF Session...\n") - session.close() - LogFile.write("Terminating Program...\n") - LogFile.close() - sys.exit(0) + SummaryWriter.add_summary(session.run( + [OverallSummary], feed_dict=feed)[0], epoch) + SummaryWriter.flush() + del feed + + if ValidationLoss < currValLoss: + LogFile.write("Validation improving.\n") + nTimesNoProgress = 0 + currValLoss = ValidationLoss + else: + LogFile.write("Validation not improving.\n") + nTimesNoProgress = nTimesNoProgress + 1 + if nTimesNoProgress == cfg.TrainThreshold: + session.close() + LogFile.write( + "No progress on validation. Terminating program.\n") + sys.exit(0) + + LogFile.write( + "######################################################\n\n") + LogFile.flush() + +except (KeyboardInterrupt, SystemExit, Exception) as e: + print(f"[Error/Interruption] {str(e)}\n") + LogFile.write(f"[Error/Interruption] {str(e)}\n") + LogFile.write("Clossing TF Session...\n") + session.close() + LogFile.write("Terminating Program...\n") + LogFile.close() + sys.exit(0) diff --git a/util.py b/util.py index 7013248..da4a297 100644 --- a/util.py +++ b/util.py @@ -6,26 +6,31 @@ #!/usr/bin/python -import tensorflow.compat.v1 as tf -tf.compat.v1.disable_eager_execution() - +from config import cfg import numpy as np import cv2 -import math import os +import math import codecs -from config import cfg +import tensorflow.compat.v1 as tf + +tf.compat.v1.disable_eager_execution() + def LoadList(path): with open(path) as vlist: return vlist.readlines() -#Ref: https://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow +# Ref: https://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow + + def batch_norm_conv(x, n_out, phase_train): with tf.variable_scope('bn'): - beta = tf.Variable(tf.constant(0.0, shape=[n_out]), name='beta', trainable=True) - gamma = tf.Variable(tf.constant(1.0, shape=[n_out]), name='gamma', trainable=True) - batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments') + beta = tf.Variable(tf.constant( + 0.0, shape=[n_out]), name='beta', trainable=True) + gamma = tf.Variable(tf.constant( + 1.0, shape=[n_out]), name='gamma', trainable=True) + batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.5) def mean_var_with_update(): @@ -38,17 +43,22 @@ def mean_var_with_update(): normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3) return normed + def weight_variable(shape): - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial) + initial = tf.truncated_normal(shape, stddev=0.1) + return tf.Variable(initial) + def conv2d(x, W, stride=(1, 1), padding='SAME'): - return tf.nn.conv2d(x, W, strides=[1, stride[0], stride[1], 1], padding=padding) + return tf.nn.conv2d(x, W, strides=[1, stride[0], stride[1], 1], padding=padding) + def max_pool(x, ksize=(2, 2), stride=(2, 2)): - return tf.nn.max_pool(x, ksize=[1, ksize[0], ksize[1], 1], strides=[1, stride[0], stride[1], 1], padding='SAME') + return tf.nn.max_pool(x, ksize=[1, ksize[0], ksize[1], 1], strides=[1, stride[0], stride[1], 1], padding='SAME') + +# Ref: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/ctc/ctc_loss_op_test.py + -#Ref: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/ctc/ctc_loss_op_test.py def target_list_to_sparse_tensor(targetList): indices = [] vals = [] @@ -60,15 +70,18 @@ def target_list_to_sparse_tensor(targetList): shape = [len(targetList), np.asarray(indices).max(0)[1]+1] return (np.array(indices), np.array(vals), np.array(shape)) + def LoadClasses(path): data = {} with codecs.open(path, 'r', encoding='utf-8') as cF: - data = cF.read().split('\n') + data = cF.read().split('\n') return data + def LoadList(path): - with open(path) as vlist: - return vlist.readlines() + with open(path) as vlist: + return vlist.readlines() + def LoadModel(session, path): saver = tf.train.Saver() @@ -81,132 +94,143 @@ def LoadModel(session, path): print('No checkpoint found') exit() + def SaveModel(session, filename, epoch): saver = tf.train.Saver() saver.save(session, filename, global_step=epoch) -def ReadData(filesLocation, filesList, numberOfFiles, WND_HEIGHT, WND_WIDTH, WND_SHIFT, VEC_PER_WND, transDir=''): - seqLens = [] - inputList = [] - targetList = [] +def ReadData(filesLocation, filesList, numberOfFiles, WND_HEIGHT, WND_WIDTH, WND_SHIFT, VEC_PER_WND, transDir=''): - with open(filesList) as listHandler: + seqLens = [] + inputList = [] + targetList = [] - imgNbr = 0 - imageFiles = listHandler.readlines()[0:numberOfFiles] + with open(filesList) as listHandler: - for imageFile in imageFiles: + imgNbr = 0 + imageFiles = listHandler.readlines()[0:numberOfFiles] - if filesLocation != '': tfile = imageFile.strip('\n') - else: tfile = os.path.basename(imageFile.strip('\n')) + for imageFile in imageFiles: - ################################################################ - # Adding transcriptions + if filesLocation != '': + tfile = imageFile.strip('\n') + else: + tfile = os.path.basename(imageFile.strip('\n')) - if transDir != '': + ################################################################ + # Adding transcriptions - targetFile = transDir + "/" + tfile + cfg.LabelFileType + if transDir != '': - with open(targetFile) as f: + targetFile = transDir + "/" + tfile + cfg.LabelFileType - data = f.readlines() + with open(targetFile) as f: - if len(data) == 0: - targetList.append([]) - else: - for i in range(len(data)): - targetData = np.fromstring(data[i], dtype=np.uint16, sep=' ') - targetList.append(targetData) + data = f.readlines() - ################################################################ - # Gathering the length of each sequence + if len(data) == 0: + targetList.append([]) + else: + for i in range(len(data)): + targetData = np.fromstring( + data[i], dtype=np.uint16, sep=' ') + targetList.append(targetData) - if filesLocation != '': imageFilePath = filesLocation + "/" + tfile + cfg.ImageFileType - else: imageFilePath = imageFile.strip('\n') + cfg.ImageFileType + ################################################################ + # Gathering the length of each sequence - print ("Reading " + imageFilePath) + if filesLocation != '': + imageFilePath = filesLocation + "/" + tfile + cfg.ImageFileType + else: + imageFilePath = imageFile.strip('\n') + cfg.ImageFileType - image = cv2.imread(imageFilePath, cv2.IMREAD_GRAYSCALE) + print("Reading " + imageFilePath) - h, w = np.shape(image) + image = cv2.imread(imageFilePath, cv2.IMREAD_GRAYSCALE) - if(h > WND_HEIGHT): factor = WND_HEIGHT/float(h) - else: factor = 1.0 + h, w = np.shape(image) - image = cv2.resize(image, None, fx=factor, fy=factor, interpolation = cv2.INTER_CUBIC) + if (h > WND_HEIGHT): + factor = WND_HEIGHT/float(h) + else: + factor = 1.0 - h, w = np.shape(image) + image = cv2.resize(image, None, fx=factor, + fy=factor, interpolation=cv2.INTER_CUBIC) - winId = 0 - wpd = 0 - while True: + h, w = np.shape(image) - s = (winId * WND_SHIFT) - e = s + WND_WIDTH + winId = 0 + wpd = 0 + while True: - if e > w: - sl = (winId+1) * VEC_PER_WND + s = (winId * WND_SHIFT) + e = s + WND_WIDTH - if transDir != '': - #Fix for small sequences - if(len(targetList[imgNbr]) > sl): - diff = len(targetList[imgNbr]) - sl - wpd = int(math.ceil(float(diff) / VEC_PER_WND)) - sl = sl + wpd * VEC_PER_WND + if e > w: + sl = (winId+1) * VEC_PER_WND - seqLens.append(sl) + if transDir != '': + # Fix for small sequences + if (len(targetList[imgNbr]) > sl): + diff = len(targetList[imgNbr]) - sl + wpd = int(math.ceil(float(diff) / VEC_PER_WND)) + sl = sl + wpd * VEC_PER_WND - break + seqLens.append(sl) - winId = winId + 1 + break - ################################################################ - # Adding features + winId = winId + 1 - featuresSet = [] + ################################################################ + # Adding features - winId = 0 - while True: + featuresSet = [] - s = (winId * WND_SHIFT) - e = s + WND_WIDTH + winId = 0 + while True: - if e > w: - pad = np.ones((h, (e - w)), np.uint8)*255 - wnd = image[:h,s:w] - wnd = np.append(wnd, pad, axis=1) + s = (winId * WND_SHIFT) + e = s + WND_WIDTH - if h < WND_HEIGHT: - pad = np.ones(((WND_HEIGHT - h), WND_WIDTH), np.uint8)*255 - wnd = np.append(pad, wnd, axis=0) + if e > w: + pad = np.ones((h, (e - w)), np.uint8)*255 + wnd = image[:h, s:w] + wnd = np.append(wnd, pad, axis=1) - featuresSet.append(wnd) + if h < WND_HEIGHT: + pad = np.ones( + ((WND_HEIGHT - h), WND_WIDTH), np.uint8)*255 + wnd = np.append(pad, wnd, axis=0) - #Fix for small sequences - pad = np.ones((WND_HEIGHT, WND_WIDTH), np.uint8)*255 + featuresSet.append(wnd) - for i in range(wpd): featuresSet.append(pad) + # Fix for small sequences + pad = np.ones((WND_HEIGHT, WND_WIDTH), np.uint8)*255 - break + for i in range(wpd): + featuresSet.append(pad) - wnd = image[:h,s:e] + break - if h < WND_HEIGHT: - pad = np.ones(((WND_HEIGHT - h), WND_WIDTH), np.uint8)*255 - wnd = np.append(pad, wnd, axis=0) + wnd = image[:h, s:e] - featuresSet.append(wnd) - winId = winId + 1 + if h < WND_HEIGHT: + pad = np.ones(((WND_HEIGHT - h), WND_WIDTH), np.uint8)*255 + wnd = np.append(pad, wnd, axis=0) - ################################################################ - inputList.append(featuresSet) + featuresSet.append(wnd) + winId = winId + 1 - imgNbr = imgNbr + 1 - ################################################################ + ################################################################ + inputList.append(featuresSet) - if transDir != '': - assert len(inputList) == len(targetList) + imgNbr = imgNbr + 1 + ################################################################ - return inputList, seqLens, targetList + if transDir != '': + assert len(inputList) == len(targetList) + return inputList, seqLens, targetList