Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Commit 6898305

Browse files
yetiancnapavlo
authored andcommitted
Add first implementation of augmentedNN to predict selectivity (#1473)
* add first implementation of augmentedNN to predict selectivity for range predicates * add first implementation of augmentedNN to predict selectivity * add first implementation of augmentedNN to predict selectivity * add comments and modify variable names * rename some variables * create brain/selectivity; create new test file for augmented_nn. * remove duplicated files * check if travis is ok
1 parent 1fc8b55 commit 6898305

14 files changed

+1108
-250
lines changed

src/brain/modelgen/augmented_nn.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#===----------------------------------------------------------------------===#
2+
#
3+
# Peloton
4+
#
5+
# AugmentedNN.py
6+
#
7+
# Identification: src/brain/modelgen/AugmentedNN.py
8+
#
9+
# Copyright (c) 2015-2018, Carnegie Mellon University Database Group
10+
#
11+
#===----------------------------------------------------------------------===#
12+
13+
import tensorflow as tf
14+
import functools
15+
import os
16+
import argparse
17+
18+
def lazy_property(function):
19+
attribute = '_cache_' + function.__name__
20+
21+
@property
22+
@functools.wraps(function)
23+
def decorator(self):
24+
if not hasattr(self, attribute):
25+
setattr(self, attribute, function(self))
26+
return getattr(self, attribute)
27+
28+
return decorator
29+
30+
class AugmentedNN:
31+
32+
def __init__(self, column_num, order=1, neuron_num=16, lr=0.1, **kwargs):
33+
tf.reset_default_graph()
34+
self.data = tf.placeholder(tf.float32, [None, column_num*2], name="data_")
35+
self.target = tf.placeholder(tf.float32, [None, 1], name="target_")
36+
self._column_num = column_num
37+
self._order = order
38+
self._neuron_num = neuron_num
39+
self._lr = tf.placeholder_with_default(lr, shape=None,
40+
name="learn_rate_")
41+
self.tf_init = tf.global_variables_initializer
42+
self.prediction
43+
self.loss
44+
self.optimize
45+
46+
@staticmethod
47+
def jump_activation(k):
48+
"""
49+
This is an activation function used to learn discontinuous functions.
50+
Reference: https://dl.acm.org/citation.cfm?id=2326898
51+
"""
52+
def jump_activation_k(x):
53+
return tf.pow(tf.maximum(0.0, 1-tf.exp(-x)), k)
54+
return jump_activation_k
55+
56+
@lazy_property
57+
def prediction(self):
58+
net = self.data
59+
kernel_init = tf.random_normal_initializer(mean=0.0001, stddev=0.0001)
60+
with tf.name_scope("hidden_layer"):
61+
net_shape = tf.shape(net)
62+
bsz = net_shape[0]
63+
64+
h1_layers = []
65+
for i in range(1, self._order+1):
66+
h1 = tf.layers.dense(net, self._neuron_num,
67+
activation=self.jump_activation(i),
68+
kernel_initializer=kernel_init)
69+
h1_layers.append(h1)
70+
h1_layers = tf.concat(h1_layers, 1)
71+
with tf.name_scope("output_layer"):
72+
net = tf.layers.dense(h1_layers, 1,
73+
activation=self.jump_activation(1),
74+
kernel_initializer=kernel_init)
75+
net = tf.reshape(net, [bsz, -1], name="pred_")
76+
return net
77+
78+
@lazy_property
79+
def loss(self):
80+
loss = tf.reduce_mean(tf.squared_difference(self.target, self.prediction), name='lossOp_')
81+
return loss
82+
83+
@lazy_property
84+
def optimize(self):
85+
params = tf.trainable_variables()
86+
gradients = tf.gradients(self.loss, params)
87+
optimizer = tf.train.AdagradOptimizer(learning_rate=self._lr)
88+
return optimizer.apply_gradients(zip(gradients,
89+
params), name="optimizeOp_")
90+
91+
def write_graph(self, dir):
92+
fname = "{}.pb".format(self.__repr__())
93+
abs_path = os.path.join(dir, fname)
94+
if not os.path.exists(abs_path):
95+
tf.train.write_graph(tf.get_default_graph(),
96+
dir, fname, False)
97+
98+
def __repr__(self):
99+
return "augmented_nn"
100+
101+
def main():
102+
parser = argparse.ArgumentParser(description='AugmentedNN Model Generator')
103+
104+
parser.add_argument('--column_num', type=int, default=1, help='Number of augmentedNN Hidden units')
105+
parser.add_argument('--order', type=int, default=3, help='Max order of activation function')
106+
parser.add_argument('--neuron_num', type=int, default=20, help='Number of neurons in hidden layer')
107+
parser.add_argument('--lr', type=float, default=0.001, help='Learning rate')
108+
parser.add_argument('graph_out_path', type=str, help='Path to write graph output', nargs='+')
109+
args = parser.parse_args()
110+
model = AugmentedNN(args.column_num, args.order, args.neuron_num, args.lr)
111+
model.tf_init()
112+
model.write_graph(' '.join(args.graph_out_path))
113+
114+
if __name__ == '__main__':
115+
main()
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Peloton
4+
//
5+
// augmented_nn.cpp
6+
//
7+
// Identification: src/brain/workload/augmented_nn.cpp
8+
//
9+
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "brain/selectivity/augmented_nn.h"
14+
#include "brain/util/model_util.h"
15+
#include "brain/util/tf_session_entity/tf_session_entity.h"
16+
#include "brain/util/tf_session_entity/tf_session_entity_input.h"
17+
#include "brain/util/tf_session_entity/tf_session_entity_output.h"
18+
#include "util/file_util.h"
19+
20+
namespace peloton {
21+
namespace brain {
22+
23+
AugmentedNN::AugmentedNN(int column_num, int order, int neuron_num,
24+
float learn_rate, int batch_size, int epochs)
25+
: BaseTFModel("src/brain/modelgen", "src/brain/modelgen/augmented_nn.py",
26+
"src/brain/modelgen/augmented_nn.pb"),
27+
column_num_(column_num),
28+
order_(order),
29+
neuron_num_(neuron_num),
30+
learn_rate_(learn_rate),
31+
batch_size_(batch_size),
32+
epochs_(epochs) {
33+
GenerateModel(ConstructModelArgsString());
34+
// Import the Model
35+
tf_session_entity_->ImportGraph(graph_path_);
36+
// Initialize the model
37+
TFInit();
38+
}
39+
40+
std::string AugmentedNN::ConstructModelArgsString() const {
41+
std::stringstream args_str_builder;
42+
args_str_builder << " --column_num " << column_num_;
43+
args_str_builder << " --order " << order_;
44+
args_str_builder << " --neuron_num " << neuron_num_;
45+
args_str_builder << " --lr " << learn_rate_;
46+
args_str_builder << " " << this->modelgen_path_;
47+
return args_str_builder.str();
48+
}
49+
50+
std::string AugmentedNN::ToString() const {
51+
std::stringstream model_str_builder;
52+
model_str_builder << "augmented_nn(";
53+
model_str_builder << "column_num = " << column_num_;
54+
model_str_builder << ", order = " << order_;
55+
model_str_builder << ", neuron_num = " << neuron_num_;
56+
model_str_builder << ", lr = " << learn_rate_;
57+
model_str_builder << ", batch_size = " << batch_size_;
58+
model_str_builder << ")";
59+
return model_str_builder.str();
60+
}
61+
62+
// returns a batch
63+
void AugmentedNN::GetBatch(const matrix_eig &mat, size_t batch_offset,
64+
size_t bsz, matrix_eig &data,
65+
matrix_eig &target) {
66+
size_t row_idx = batch_offset * bsz;
67+
data = mat.block(row_idx, 0, bsz, mat.cols() - 1);
68+
target = mat.block(row_idx, mat.cols() - 1, bsz, 1);
69+
}
70+
71+
// backpropagate once
72+
void AugmentedNN::Fit(const matrix_eig &X, const matrix_eig &y, int bsz) {
73+
auto data_batch = EigenUtil::Flatten(X);
74+
auto target_batch = EigenUtil::Flatten(y);
75+
std::vector<int64_t> dims_data{bsz, X.cols()};
76+
std::vector<int64_t> dims_target{bsz, 1};
77+
std::vector<TfFloatIn *> inputs_optimize{
78+
new TfFloatIn(data_batch.data(), dims_data, "data_"),
79+
new TfFloatIn(target_batch.data(), dims_target, "target_"),
80+
new TfFloatIn(learn_rate_, "learn_rate_")};
81+
tf_session_entity_->Eval(inputs_optimize, "optimizeOp_");
82+
std::for_each(inputs_optimize.begin(), inputs_optimize.end(), TFIO_Delete);
83+
}
84+
85+
float AugmentedNN::TrainEpoch(const matrix_eig &mat) {
86+
std::vector<float> losses;
87+
// Obtain relevant metadata
88+
int min_allowed_bsz = 1;
89+
int bsz = std::min((int)mat.rows(), std::max(batch_size_, min_allowed_bsz));
90+
int number_of_batches = mat.rows() / bsz;
91+
int num_cols = mat.cols() - 1;
92+
93+
std::vector<matrix_eig> y_batch, y_hat_batch;
94+
// Run through each batch and compute loss/apply backprop
95+
for (int batch_offset = 0; batch_offset < number_of_batches;
96+
++batch_offset) {
97+
matrix_eig data_batch, target_batch;
98+
GetBatch(mat, batch_offset, bsz, data_batch, target_batch);
99+
100+
std::vector<int64_t> dims_data{bsz, num_cols};
101+
std::vector<int64_t> dims_target{bsz, 1};
102+
103+
Fit(data_batch, target_batch, bsz);
104+
105+
matrix_eig y_hat_eig = Predict(data_batch, bsz);
106+
y_hat_batch.push_back(y_hat_eig);
107+
y_batch.push_back(target_batch);
108+
}
109+
matrix_eig y = EigenUtil::VStack(y_batch);
110+
matrix_eig y_hat = EigenUtil::VStack(y_hat_batch);
111+
return ModelUtil::MeanSqError(y, y_hat);
112+
113+
}
114+
115+
// x: [bsz, 2]
116+
// return: [bsz, 1]
117+
matrix_eig AugmentedNN::Predict(const matrix_eig &X, int bsz) const {
118+
auto data_batch = EigenUtil::Flatten(X);
119+
std::vector<int64_t> dims_data{bsz, X.cols()};
120+
std::vector<int64_t> dims_target{bsz, 1};
121+
122+
std::vector<TfFloatIn *> inputs_predict{
123+
new TfFloatIn(data_batch.data(), dims_data, "data_")};
124+
auto output_predict = new TfFloatOut(dims_target, "pred_");
125+
// Obtain predicted values
126+
auto out = tf_session_entity_->Eval(inputs_predict, output_predict);
127+
128+
matrix_t y_hat;
129+
for (int res_idx = 0; res_idx < bsz; res_idx++) {
130+
vector_t res = {out[res_idx]};
131+
y_hat.push_back(res);
132+
}
133+
std::for_each(inputs_predict.begin(), inputs_predict.end(), TFIO_Delete);
134+
TFIO_Delete(output_predict);
135+
return EigenUtil::ToEigenMat(y_hat);
136+
}
137+
138+
float AugmentedNN::ValidateEpoch(const matrix_eig &mat) {
139+
// Obtain relevant metadata
140+
int min_allowed_bsz = 1;
141+
int bsz = std::min((int)mat.rows(), std::max(batch_size_, min_allowed_bsz));
142+
int number_of_batches = mat.rows() / bsz;
143+
int num_cols = mat.cols() - 1;
144+
145+
std::vector<matrix_eig> y_batch, y_hat_batch;
146+
// Apply Validation
147+
// Run through each batch and compute loss/apply backprop
148+
for (int batch_offset = 0; batch_offset < number_of_batches;
149+
++batch_offset) {
150+
matrix_eig data_batch, target_batch;
151+
GetBatch(mat, batch_offset, bsz, data_batch, target_batch);
152+
153+
std::vector<int64_t> dims_data{bsz, num_cols};
154+
std::vector<int64_t> dims_target{bsz, 1};
155+
156+
matrix_eig y_hat_eig = Predict(data_batch, bsz);
157+
y_hat_batch.push_back(y_hat_eig);
158+
y_batch.push_back(target_batch);
159+
}
160+
matrix_eig y = EigenUtil::VStack(y_batch);
161+
matrix_eig y_hat = EigenUtil::VStack(y_hat_batch);
162+
return ModelUtil::MeanSqError(y, y_hat);
163+
164+
}
165+
} // namespace brain
166+
} // namespace peloton
167+
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Peloton
4+
//
5+
// selectivity_defaults.cpp
6+
//
7+
// Identification: src/brain/workload/selectivity_defaults.cpp
8+
//
9+
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "brain/selectivity/selectivity_defaults.h"
14+
15+
namespace peloton {
16+
namespace brain {
17+
18+
const int AugmentedNNDefaults::COLUMN_NUM = 1;
19+
const int AugmentedNNDefaults::ORDER = 1;
20+
const int AugmentedNNDefaults::NEURON_NUM = 16;
21+
const float AugmentedNNDefaults::LR = 0.1f;
22+
const int AugmentedNNDefaults::BATCH_SIZE = 256;
23+
const int AugmentedNNDefaults::EPOCHS = 600;
24+
25+
26+
} // namespace brain
27+
} // namespace peloton
Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,40 @@
1-
//===----------------------------------------------------------------------===//
2-
//
3-
// Peloton
4-
//
5-
// workload_defaults.cpp
6-
//
7-
// Identification: src/brain/workload/workload_defaults.cpp
8-
//
9-
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
10-
//
11-
//===----------------------------------------------------------------------===//
12-
13-
#include "brain/workload/workload_defaults.h"
14-
15-
namespace peloton {
16-
namespace brain {
17-
18-
const int CommonWorkloadDefaults::HORIZON = 216;
19-
const int CommonWorkloadDefaults::INTERVAL = 100;
20-
const int CommonWorkloadDefaults::PADDLING_DAYS = 7;
21-
const int CommonWorkloadDefaults::ESTOP_PATIENCE = 10;
22-
const float CommonWorkloadDefaults::ESTOP_DELTA = 0.01f;
23-
24-
const int LSTMWorkloadDefaults::NFEATS = 3;
25-
const int LSTMWorkloadDefaults::NENCODED = 20;
26-
const int LSTMWorkloadDefaults::NHID = 20;
27-
const int LSTMWorkloadDefaults::NLAYERS = 2;
28-
const float LSTMWorkloadDefaults::LR = 0.01f;
29-
const float LSTMWorkloadDefaults::DROPOUT_RATE = 0.5f;
30-
const float LSTMWorkloadDefaults::CLIP_NORM = 0.5f;
31-
const int LSTMWorkloadDefaults::BATCH_SIZE = 12;
32-
const int LSTMWorkloadDefaults::BPTT = 90;
33-
const int LSTMWorkloadDefaults::EPOCHS = 100;
34-
35-
const int LinearRegWorkloadDefaults::BPTT = 90;
36-
37-
const int KernelRegWorkloadDefaults::BPTT = 90;
38-
39-
} // namespace brain
40-
} // namespace peloton
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Peloton
4+
//
5+
// workload_defaults.cpp
6+
//
7+
// Identification: src/brain/workload/workload_defaults.cpp
8+
//
9+
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "brain/workload/workload_defaults.h"
14+
15+
namespace peloton {
16+
namespace brain {
17+
18+
const int CommonWorkloadDefaults::HORIZON = 216;
19+
const int CommonWorkloadDefaults::INTERVAL = 100;
20+
const int CommonWorkloadDefaults::PADDLING_DAYS = 7;
21+
const int CommonWorkloadDefaults::ESTOP_PATIENCE = 10;
22+
const float CommonWorkloadDefaults::ESTOP_DELTA = 0.01f;
23+
24+
const int LSTMWorkloadDefaults::NFEATS = 3;
25+
const int LSTMWorkloadDefaults::NENCODED = 20;
26+
const int LSTMWorkloadDefaults::NHID = 20;
27+
const int LSTMWorkloadDefaults::NLAYERS = 2;
28+
const float LSTMWorkloadDefaults::LR = 0.01f;
29+
const float LSTMWorkloadDefaults::DROPOUT_RATE = 0.5f;
30+
const float LSTMWorkloadDefaults::CLIP_NORM = 0.5f;
31+
const int LSTMWorkloadDefaults::BATCH_SIZE = 12;
32+
const int LSTMWorkloadDefaults::BPTT = 90;
33+
const int LSTMWorkloadDefaults::EPOCHS = 100;
34+
35+
const int LinearRegWorkloadDefaults::BPTT = 90;
36+
37+
const int KernelRegWorkloadDefaults::BPTT = 90;
38+
39+
} // namespace brain
40+
} // namespace peloton

0 commit comments

Comments
 (0)