-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclassification.py
More file actions
121 lines (96 loc) · 3.26 KB
/
classification.py
File metadata and controls
121 lines (96 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from __future__ import absolute_import, division, print_function
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import os
import matplotlib.pyplot as plt
# Read in data, shuffle
df = pd.read_csv("data.csv")
df.sample(frac=1)
# Split data into features and labels
Y = df["time"].values.reshape(df.shape[0], 1)
df = df.drop("time", 1)
df = df.drop("bus_id", 1)
for i in range(7):
df = df.drop(str(i+1), 1)
dataset = df.values
dataset = dataset.astype(float)
X = dataset[:, 1:dataset.shape[1]]
bins = [76200, 76500, 76800, 77100, 77400]
Y = keras.utils.to_categorical(np.digitize(Y, bins, True))
# Split into training and test groups
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
# feature normalization
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std
print(mean)
print(std)
# Defines the model
def build_model():
model = keras.Sequential([
keras.layers.Dense(8, activation=tf.nn.relu,
input_shape=(X_train.shape[1],)),
keras.layers.Dense(5, activation=tf.nn.relu),
keras.layers.Dense(6, activation=tf.nn.softmax)
])
optimizer = tf.train.AdamOptimizer()
model.compile(loss="categorical_crossentropy",
optimizer=optimizer,
metrics=["mae"])
return model
# Uses the model
model = build_model()
early_stop = keras.callbacks.EarlyStopping(monitor="val_loss", patience=20)
# Display training progress by printing a single dot for each completed epoch
class PrintDot(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0:
print("")
print(".", end="")
EPOCHS = 2500
# Saves the things
checkpoint_path = "./checkpoint.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callbacks = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
save_weights_only=True,
verbose=1, period=EPOCHS)
# Store training stats
history = model.fit(X_train, Y_train, epochs=EPOCHS,
validation_split=0.2, verbose=0,
callbacks=[PrintDot(), cp_callbacks])
model.save("model.h5")
# Graph training and cross validation losses
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Mean Abs Error')
plt.plot(history.epoch, np.array(history.history['mean_absolute_error']),
label='Train Loss')
plt.plot(history.epoch, np.array(history.history['val_mean_absolute_error']),
label='Val loss')
plt.legend()
plt.xlim(plt.xlim())
plt.ylim(plt.ylim())
plt.show()
# model.load_weights("model.h5")
# Runs test points through algorithm & predicts outcome
test_predictions = np.argmax(model.predict(X_test), axis = 1)
Y_argmaxed = np.argmax(Y_test, axis = 1)
plt.scatter(Y_argmaxed, test_predictions)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.axis('equal')
plt.xlim(plt.xlim())
plt.ylim(plt.ylim())
_ = plt.plot([-100, 100], [-100, 100])
plt.show()
error = test_predictions - Y_argmaxed
plt.hist(error, bins=50)
plt.xlabel("Prediction Error")
_ = plt.ylabel("Count")
plt.show()