-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcode.py
More file actions
90 lines (81 loc) · 2.84 KB
/
code.py
File metadata and controls
90 lines (81 loc) · 2.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read().lower()
print('corpus length:', len(text))
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
sentences.append(text[i: i + maxlen])
next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))
print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
Y= np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
for t, char in enumerate(sentence):
X[i, t, char_indices[char]] = 1
Y[i, char_indices[next_chars[i]]] = 1
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars) ) ) )
model.add(Dense(len(chars)))
model.add(Activation('softmax'))
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
model.fit(X, Y, batch_size=128, nb_epoch=1)
# def sample(preds, temperature=1.0):
# # helper function to sample an index from a probability array
# preds = np.asarray(preds).astype('float64')
# preds = np.log(preds) / temperature
# exp_preds = np.exp(preds)
# preds = exp_preds / np.sum(exp_preds)
# probas = np.random.multinomial(1, preds, 1)
# return np.argmax(probas)
#
# for iteration in range(1, 60):
# print()
# print('-' * 50)
# print('Iteration', iteration)
# model.fit(X, y, batch_size=128, nb_epoch=1)
#
# start_index = random.randint(0, len(text) - maxlen - 1)
#
# for diversity in [0.2, 0.5, 1.0, 1.2]:
# print()
# print('----- diversity:', diversity)
#
# generated = ''
# sentence = text[start_index: start_index + maxlen]
# generated += sentence
# print('----- Generating with seed: "' + sentence + '"')
# sys.stdout.write(generated)
#
# for i in range(400):
# x = np.zeros((1, maxlen, len(chars)))
# for t, char in enumerate(sentence):
# x[0, t, char_indices[char]] = 1.
#
# preds = model.predict(x, verbose=0)[0]
# next_index = sample(preds, diversity)
# next_char = indices_char[next_index]
#
# generated += next_char
# sentence = sentence[1:] + next_char
#
# sys.stdout.write(next_char)
# sys.stdout.flush()
# print()