diff --git a/README.md b/README.md index 7bafccb..5b1c7d1 100755 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Tensorflow Chatbot -Tensorflow Chatbot Demo by @Sirajology on [Youtube](https://youtu.be/SJDEOWLHYVo) +Chatbot made using tensor flow and trained using Cornell movie dialouge corpus . Overview ============ @@ -36,17 +36,24 @@ then run the code like so ``python execute.py`` +To run along with UI on your web browser : -Challenge -=========== +- install flask on your venv +- open execute.py + + comment this _conf_ints = [ (key, int(value)) for key,value in parser.items('ints') ] + uncomment the assignement below it for _conf_ints + + comment this _conf_floats = [ (key, float(value)) for key,value in parser.items('floats') ] + uncomment the assignement below it for _conf_floats + + comment this _conf_strings = [ (key, str(value)) for key,value in parser.items('strings') ] + uncomment the assignement below it for _conf_strings -The challenge for this video is write an entirely different script using [TF Learn](http://tflearn.org/) to generate Lord of the Ring style sentences. Check out this very similar [example](https://github.com/tflearn/tflearn/blob/master/examples/nlp/lstm_generator_shakespeare.py), it uses TF Learn to generate Shakespeare-style sentences. Train your model on Lord of the rings text to do something similar! And play around with the hyperparameters to get a more accurate result. Post your GitHub link in the video comments and I'll judge it! -### Due date: December 8th +- run ui/app.py -Also see this issue, some people have found this discussion helpful -https://github.com/llSourcell/tensorflow_chatbot/issues/3 Credits =========== -Credit for the vast majority of code here goes to [suriyadeepan](https://github.com/suriyadeepan). I've merely created a wrapper to get people started. +Credit for the vast majority of code here goes to siraj and [suriyadeepan] diff --git a/execute.py b/execute.py index 1ce1dc9..135e6a1 100755 --- a/execute.py +++ b/execute.py @@ -40,19 +40,25 @@ reload(sys).setdefaultencoding('utf-8') try: - from ConfigParser import SafeConfigParser + from configparser import ConfigParser except: - from configparser import SafeConfigParser # In Python 3, ConfigParser has been renamed to configparser for PEP 8 compliance. + from configparser import ConfigParser # In Python 3, ConfigParser has been renamed to configparser for PEP 8 compliance. gConfig = {} -def get_config(config_file='seq2seq.ini'): - parser = SafeConfigParser() +def get_config(config_file='/Users/ajay/Desktop/tensorflow_chatbot/seq2seq.ini'): + parser = ConfigParser() parser.read(config_file) # get the ints, floats and strings _conf_ints = [ (key, int(value)) for key,value in parser.items('ints') ] + #_conf_ints = [('enc_vocab_size', 20000), ('dec_vocab_size', 20000), ('num_layers', 3), ('layer_size', 256), + #('max_train_data_size', 0), ('batch_size', 64), ('steps_per_checkpoint', 300)] _conf_floats = [ (key, float(value)) for key,value in parser.items('floats') ] + #_conf_floats = [('learning_rate', 0.5), ('learning_rate_decay_factor', 0.99), ('max_gradient_norm', 5.0)] _conf_strings = [ (key, str(value)) for key,value in parser.items('strings') ] + #_conf_strings = [('mode', 'test'), ('train_enc', 'data/train.enc'), ('train_dec', + #'data/train.dec'), ('test_enc','data/test.enc') + #, ('test_dec', 'data/test.dec'), ('working_directory', '/Users/ajay/Desktop/tensorflow_chatbot/working_dir')] return dict(_conf_ints + _conf_floats + _conf_strings) # We use a number of buckets and pad to the closest one for efficiency. diff --git a/neuralconvo.ini b/neuralconvo.ini index 0048351..388b023 100755 --- a/neuralconvo.ini +++ b/neuralconvo.ini @@ -4,9 +4,9 @@ mode = train train_enc = data/train.enc train_dec = data/train.dec test_enc = data/test.enc -test_dec = data/test.enc +test_dec = data/test.dec # folder where checkpoints, vocabulary, temporary data will be stored -working_directory = working_dir/ +working_directory = /Users/ajay/Desktop/tensorflow_chatbot/working_dir [ints] # vocabulary size # 20,000 is a reasonable size diff --git a/seq2seq.ini b/seq2seq.ini index 392f9da..21767f9 100755 --- a/seq2seq.ini +++ b/seq2seq.ini @@ -4,9 +4,9 @@ mode = train train_enc = data/train.enc train_dec = data/train.dec test_enc = data/test.enc -test_dec = data/test.enc +test_dec = data/test.dec # folder where checkpoints, vocabulary, temporary data will be stored -working_directory = working_dir/ +working_directory = /Users/ajay/Desktop/tensorflow_chatbot/working_dir [ints] # vocabulary size # 20,000 is a reasonable size diff --git a/seq2seq_model.py b/seq2seq_model.py index 5b9f39b..53f4d42 100755 --- a/seq2seq_model.py +++ b/seq2seq_model.py @@ -19,23 +19,16 @@ from __future__ import division from __future__ import print_function +import copy import random import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf -#from tensorflow.models.rnn.translate import data_utils -#fixes File "execute.py", line 31, in - #import seq2seq_model - #File "C:\PYTHONCODE\Tensorflow\chatbot\tensorflow_chatbot\seq2seq_model.py", l -#ine 28, in - #from tensorflow.models.rnn.translate import data_utils -#ModuleNotFoundError: No module named 'tensorflow.models' import data_utils - class Seq2SeqModel(object): """Sequence-to-sequence model with attention and for multiple buckets. @@ -51,10 +44,20 @@ class Seq2SeqModel(object): http://arxiv.org/abs/1412.2007 """ - def __init__(self, source_vocab_size, target_vocab_size, buckets, size, - num_layers, max_gradient_norm, batch_size, learning_rate, - learning_rate_decay_factor, use_lstm=False, - num_samples=512, forward_only=False): + def __init__(self, + source_vocab_size, + target_vocab_size, + buckets, + size, + num_layers, + max_gradient_norm, + batch_size, + learning_rate, + learning_rate_decay_factor, + use_lstm=False, + num_samples=512, + forward_only=False, + dtype=tf.float32): """Create the model. Args: @@ -76,12 +79,14 @@ def __init__(self, source_vocab_size, target_vocab_size, buckets, size, use_lstm: if true, we use LSTM cells instead of GRU cells. num_samples: number of samples for sampled softmax. forward_only: if set, we do not construct the backward pass in the model. + dtype: the data type to use to store internal variables. """ self.source_vocab_size = source_vocab_size self.target_vocab_size = target_vocab_size self.buckets = buckets self.batch_size = batch_size - self.learning_rate = tf.Variable(float(learning_rate), trainable=False) + self.learning_rate = tf.Variable( + float(learning_rate), trainable=False, dtype=dtype) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) @@ -91,34 +96,52 @@ def __init__(self, source_vocab_size, target_vocab_size, buckets, size, softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. if num_samples > 0 and num_samples < self.target_vocab_size: - w = tf.get_variable("proj_w", [size, self.target_vocab_size]) - w_t = tf.transpose(w) - b = tf.get_variable("proj_b", [self.target_vocab_size]) + w_t = tf.get_variable("proj_w", [self.target_vocab_size, size], dtype=dtype) + w = tf.transpose(w_t) + b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=dtype) output_projection = (w, b) - def sampled_loss(inputs, labels): + def sampled_loss(labels, logits): labels = tf.reshape(labels, [-1, 1]) - return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples, - self.target_vocab_size) + # We need to compute the sampled_softmax_loss using 32bit floats to + # avoid numerical instabilities. + local_w_t = tf.cast(w_t, tf.float32) + local_b = tf.cast(b, tf.float32) + local_inputs = tf.cast(logits, tf.float32) + return tf.cast( + tf.nn.sampled_softmax_loss( + weights=local_w_t, + biases=local_b, + labels=labels, + inputs=local_inputs, + num_sampled=num_samples, + num_classes=self.target_vocab_size), + dtype) softmax_loss_function = sampled_loss # Create the internal multi-layer cell for our RNN. - single_cell = tf.nn.rnn_cell.GRUCell(size) + def single_cell(): + return tf.contrib.rnn.GRUCell(size) if use_lstm: - single_cell = tf.nn.rnn_cell.BasicLSTMCell(size) - cell = single_cell + def single_cell(): + return tf.contrib.rnn.BasicLSTMCell(size) + cell = single_cell() if num_layers > 1: - cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers) + cell = tf.contrib.rnn.MultiRNNCell([single_cell() for _ in range(num_layers)]) # The seq2seq function: we use embedding for the input and attention. - def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): - return tf.nn.seq2seq.embedding_attention_seq2seq( - encoder_inputs, decoder_inputs, cell, - num_encoder_symbols=source_vocab_size, - num_decoder_symbols=target_vocab_size, - embedding_size=size, - output_projection=output_projection, - feed_previous=do_decode) + def seq2seq_f(encoder_inputs, decoder_inputs, do_decode=False): + tmp_cell = copy.deepcopy(cell) #new + return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq( + encoder_inputs, + decoder_inputs, + tmp_cell, #new + num_encoder_symbols=source_vocab_size, + num_decoder_symbols=target_vocab_size, + embedding_size=size, + output_projection=output_projection, + feed_previous=do_decode, + dtype=dtype) # Feeds for inputs. self.encoder_inputs = [] @@ -130,7 +153,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): for i in xrange(buckets[-1][1] + 1): self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) - self.target_weights.append(tf.placeholder(tf.float32, shape=[None], + self.target_weights.append(tf.placeholder(dtype, shape=[None], name="weight{0}".format(i))) # Our targets are decoder inputs shifted by one. @@ -139,7 +162,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): # Training outputs and losses. if forward_only: - self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets( + self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True), softmax_loss_function=softmax_loss_function) @@ -151,7 +174,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): for output in self.outputs[b] ] else: - self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets( + self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, False), @@ -171,7 +194,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): self.updates.append(opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step)) - self.saver = tf.train.Saver(tf.all_variables()) + self.saver = tf.train.Saver(tf.global_variables()) def step(self, session, encoder_inputs, decoder_inputs, target_weights, bucket_id, forward_only): @@ -270,20 +293,20 @@ def get_batch(self, data, bucket_id): batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], [] # Batch encoder inputs are just re-indexed encoder_inputs. - for length_idx in xrange(encoder_size): + for length_idx in range(encoder_size): batch_encoder_inputs.append( np.array([encoder_inputs[batch_idx][length_idx] - for batch_idx in xrange(self.batch_size)], dtype=np.int32)) + for batch_idx in range(self.batch_size)], dtype=np.int32)) # Batch decoder inputs are re-indexed decoder_inputs, we create weights. - for length_idx in xrange(decoder_size): + for length_idx in range(decoder_size): batch_decoder_inputs.append( np.array([decoder_inputs[batch_idx][length_idx] - for batch_idx in xrange(self.batch_size)], dtype=np.int32)) + for batch_idx in range(self.batch_size)], dtype=np.int32)) # Create target_weights to be 0 for targets that are padding. batch_weight = np.ones(self.batch_size, dtype=np.float32) - for batch_idx in xrange(self.batch_size): + for batch_idx in range(self.batch_size): # We set weight to 0 if the corresponding target is a PAD symbol. # The corresponding target is decoder_input shifted by 1 forward. if length_idx < decoder_size - 1: @@ -291,4 +314,4 @@ def get_batch(self, data, bucket_id): if length_idx == decoder_size - 1 or target == data_utils.PAD_ID: batch_weight[batch_idx] = 0.0 batch_weights.append(batch_weight) - return batch_encoder_inputs, batch_decoder_inputs, batch_weights + return batch_encoder_inputs, batch_decoder_inputs, batch_weights \ No newline at end of file diff --git a/seq2seq_serve.ini b/seq2seq_serve.ini index fe09704..69af8fb 100755 --- a/seq2seq_serve.ini +++ b/seq2seq_serve.ini @@ -4,9 +4,9 @@ mode = serve train_enc = data/train.enc train_dec = data/train.dec test_enc = data/test.enc -test_dec = data/test.enc +test_dec = data/test.dec # folder where checkpoints, vocabulary, temporary data will be stored -working_directory = working_dir/ +working_directory = /Users/ajay/Desktop/tensorflow_chatbot/working_dir [ints] # vocabulary size # 20,000 is a reasonable size diff --git a/ui/app.py b/ui/app.py index fd09a09..73dd494 100755 --- a/ui/app.py +++ b/ui/app.py @@ -1,7 +1,7 @@ from flask import Flask, render_template, request from flask import jsonify -app = Flask(__name__,static_url_path="/static") +app = Flask(__name__, static_url_path="/static") ############# # Routing