|
2 | 2 | import tensorflow as tf |
3 | 3 | from tensorflow import keras |
4 | 4 | # from tensorflow.keras import layers |
5 | | -# TODO: error : ValueError: Error when checking input: expected embedding_1_input to have 2 dimensions, but got array with shape (196024, 40, 55) |
6 | | -# from keras.optimizers import RMSprop |
| 5 | +# from keras.callbacks import LambdaCallback |
| 6 | +import random |
| 7 | +import sys |
| 8 | +import io |
7 | 9 |
|
| 10 | +# one hot encoding / vectorizing |
8 | 11 | with open('1984.txt', 'r') as file: |
9 | 12 | text = file.read().lower() |
10 | 13 | print('text length', len(text)) |
|
13 | 16 | char_indices = dict((c, i) for i, c in enumerate(chars)) |
14 | 17 | indices_char = dict((i, c) for i, c in enumerate(chars)) |
15 | 18 |
|
| 19 | +# create our time steps |
16 | 20 | maxlen = 40 |
17 | 21 | step = 3 |
18 | 22 | sentences = [] |
|
27 | 31 | x[i, t, char_indices[char]] = 1 |
28 | 32 | y[i, char_indices[next_chars[i]]] = 1 |
29 | 33 |
|
30 | | -model = keras.Sequential() |
| 34 | +# model = keras.Sequential() |
31 | 35 | # Add an Embedding layer expecting input vocab of size 1000, and |
32 | 36 | # output embedding dimension of size 64. |
33 | | -model.add(keras.layers.Embedding(input_dim=1000, output_dim=64)) |
34 | | - |
35 | | -# Add a RNN layer with 128 internal units. |
36 | | -model.add(keras.layers.SimpleRNN(128)) |
37 | | - |
38 | | -# Add a Dense layer with 10 units. |
39 | | -model.add(keras.layers.Dense(10)) |
| 37 | +# model.add(keras.layers.Embedding(input_dim=1000, output_dim=64)) |
40 | 38 |
|
41 | | -model.summary() |
| 39 | +# Add a lstm layer with 128 internal units. |
| 40 | +model = keras.Sequential() |
| 41 | +model.add(keras.layers.LSTM(128, input_shape=(maxlen, len(chars)))) |
| 42 | +model.add(keras.layers.Dense(len(chars), activation='softmax')) |
42 | 43 |
|
43 | 44 | # predict some text |
44 | 45 |
|
45 | 46 | optimizer = keras.optimizers.RMSprop(lr=0.01) |
46 | 47 | model.compile(loss='categorical_crossentropy', optimizer=optimizer) |
| 48 | +def sample(preds, temperature=1.0): |
| 49 | + # helper function to sample an index from a probability array |
| 50 | + preds = np.asarray(preds).astype('float64') |
| 51 | + preds = np.log(preds) / temperature |
| 52 | + exp_preds = np.exp(preds) |
| 53 | + preds = exp_preds / np.sum(exp_preds) |
| 54 | + probas = np.random.multinomial(1, preds, 1) |
| 55 | + return np.argmax(probas) |
| 56 | + |
| 57 | +def on_epoch_end(epoch, _): |
| 58 | + # Function invoked at end of each epoch. Prints generated text. |
| 59 | + print() |
| 60 | + print('----- Generating text after Epoch: %d' % epoch) |
| 61 | + |
| 62 | + start_index = random.randint(0, len(text) - maxlen - 1) |
| 63 | + for diversity in [0.2, 0.5, 1.0, 1.2]: |
| 64 | + print('----- diversity:', diversity) |
| 65 | + |
| 66 | + generated = '' |
| 67 | + sentence = text[start_index: start_index + maxlen] |
| 68 | + generated += sentence |
| 69 | + print('----- Generating with seed: "' + sentence + '"') |
| 70 | + sys.stdout.write(generated) |
| 71 | + |
| 72 | + for i in range(400): |
| 73 | + x_pred = np.zeros((1, maxlen, len(chars))) |
| 74 | + for t, char in enumerate(sentence): |
| 75 | + x_pred[0, t, char_indices[char]] = 1. |
| 76 | + |
| 77 | + preds = model.predict(x_pred, verbose=0)[0] |
| 78 | + next_index = sample(preds, diversity) |
| 79 | + next_char = indices_char[next_index] |
| 80 | + |
| 81 | + generated += next_char |
| 82 | + sentence = sentence[1:] + next_char |
47 | 83 |
|
| 84 | + sys.stdout.write(next_char) |
| 85 | + sys.stdout.flush() |
| 86 | + print() |
48 | 87 |
|
49 | | -model.fit(x, y, batch_size=128, epochs=5) |
| 88 | +print_callback = keras.callbacks.LambdaCallback(on_epoch_end=on_epoch_end) |
| 89 | +model.fit(x, y, batch_size=128, epochs=5, |
| 90 | + callbacks=[print_callback]) |
0 commit comments