cmu-db
diff --git a/‎cmake/Dependencies.cmake
Lines changed: 4 additions & 0 deletions b/‎cmake/Dependencies.cmake
Lines changed: 4 additions & 0 deletions
diff --git a/‎cmake/Modules/FindEigen3.cmake
Lines changed: 97 additions & 0 deletions b/‎cmake/Modules/FindEigen3.cmake
Lines changed: 97 additions & 0 deletions
diff --git a/‎script/installation/packages.sh
Lines changed: 10 additions & 6 deletions b/‎script/installation/packages.sh
Lines changed: 10 additions & 6 deletions
diff --git a/‎src/CMakeLists.txt
Lines changed: 0 additions & 17 deletions b/‎src/CMakeLists.txt
Lines changed: 0 additions & 17 deletions
diff --git a/‎src/brain/modelgen/LSTM.pb
-229 KB b/‎src/brain/modelgen/LSTM.pb
-229 KB
diff --git a/‎src/brain/modelgen/LSTM.py
Lines changed: 146 additions & 0 deletions b/‎src/brain/modelgen/LSTM.py
Lines changed: 146 additions & 0 deletions
@@ -42,6 +42,10 @@ find_library(TFlowC
         PATHS "/usr/local/lib")
 list(APPEND Peloton_LINKER_LIBS ${TFlowC})
 
+# --[ Eigen3
+find_package(Eigen3 REQUIRED)
+include_directories(SYSTEM ${EIGEN3_INCLUDE_DIR})
+
 # ---[ Libevent
 find_package(Libevent REQUIRED)
 include_directories(SYSTEM ${LIBEVENT_INCLUDE_DIRS})
 
@@ -0,0 +1,97 @@
+# - Try to find Eigen3 lib
+#
+# This module supports requiring a minimum version, e.g. you can do
+#   find_package(Eigen3 3.1.2)
+# to require version 3.1.2 or newer of Eigen3.
+#
+# Once done this will define
+#
+#  EIGEN3_FOUND - system has eigen lib with correct version
+#  EIGEN3_INCLUDE_DIR - the eigen include directory
+#  EIGEN3_VERSION - eigen version
+#
+# This module reads hints about search locations from 
+# the following enviroment variables:
+#
+# EIGEN3_ROOT
+# EIGEN3_ROOT_DIR
+
+# Copyright (c) 2006, 2007 Montel Laurent, <[email protected]>
+# Copyright (c) 2008, 2009 Gael Guennebaud, <[email protected]>
+# Copyright (c) 2009 Benoit Jacob <[email protected]>
+# Redistribution and use is allowed according to the terms of the 2-clause BSD license.
+
+if(NOT Eigen3_FIND_VERSION)
+  if(NOT Eigen3_FIND_VERSION_MAJOR)
+    set(Eigen3_FIND_VERSION_MAJOR 2)
+  endif(NOT Eigen3_FIND_VERSION_MAJOR)
+  if(NOT Eigen3_FIND_VERSION_MINOR)
+    set(Eigen3_FIND_VERSION_MINOR 91)
+  endif(NOT Eigen3_FIND_VERSION_MINOR)
+  if(NOT Eigen3_FIND_VERSION_PATCH)
+    set(Eigen3_FIND_VERSION_PATCH 0)
+  endif(NOT Eigen3_FIND_VERSION_PATCH)
+
+  set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}")
+endif(NOT Eigen3_FIND_VERSION)
+
+macro(_eigen3_check_version)
+  file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header)
+
+  string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}")
+  set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}")
+  string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}")
+  set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}")
+  string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}")
+  set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}")
+
+  set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION})
+  if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
+    set(EIGEN3_VERSION_OK FALSE)
+  else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
+    set(EIGEN3_VERSION_OK TRUE)
+  endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
+
+  if(NOT EIGEN3_VERSION_OK)
+
+    message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, "
+                   "but at least version ${Eigen3_FIND_VERSION} is required")
+  endif(NOT EIGEN3_VERSION_OK)
+endmacro(_eigen3_check_version)
+
+if (EIGEN3_INCLUDE_DIR)
+
+  # in cache already
+  _eigen3_check_version()
+  set(EIGEN3_FOUND ${EIGEN3_VERSION_OK})
+
+else (EIGEN3_INCLUDE_DIR)
+  
+  # search first if an Eigen3Config.cmake is available in the system,
+  # if successful this would set EIGEN3_INCLUDE_DIR and the rest of
+  # the script will work as usual
+  find_package(Eigen3 ${Eigen3_FIND_VERSION} NO_MODULE QUIET)
+
+  if(NOT EIGEN3_INCLUDE_DIR)
+    find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library
+        HINTS
+        ENV EIGEN3_ROOT 
+        ENV EIGEN3_ROOT_DIR
+        PATHS
+        ${CMAKE_INSTALL_PREFIX}/include
+        ${KDE4_INCLUDE_DIR}
+        PATH_SUFFIXES eigen3 eigen
+      )
+  endif(NOT EIGEN3_INCLUDE_DIR)
+
+  if(EIGEN3_INCLUDE_DIR)
+    _eigen3_check_version()
+  endif(EIGEN3_INCLUDE_DIR)
+
+  include(FindPackageHandleStandardArgs)
+  find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK)
+
+  mark_as_advanced(EIGEN3_INCLUDE_DIR)
+
+endif(EIGEN3_INCLUDE_DIR)
+
@@ -48,10 +48,10 @@ function install_protobuf3.4.0() {
  if [ "$DISTRIB" == "ubuntu" ]
  then
     sudo apt-get --yes --force-yes remove --purge libprotobuf-dev protobuf-compiler
- elif [ "$DISTRIB" == "fedora" ] 
+ elif [ "$DISTRIB" == "fedora" ]
  then
     sudo dnf -q remove -y protobuf protobuf-devel protobuf-compiler
- else 
+ else
     echo "Only Ubuntu and Fedora is supported currently!"
     return 0
  fi
@@ -72,7 +72,8 @@ function install_tf() {
  TARGET_DIRECTORY="/usr/local"
  # Install Tensorflow Python Binary
  sudo -E pip3 install --upgrade pip
- sudo -E pip3 install --upgrade tensorflow==${TF_VERSION}
+ # Related issue: https://github.com/pypa/pip/issues/3165
+ sudo -E pip3 install tensorflow==${TF_VERSION} --upgrade --ignore-installed six
 
  # Install C-API
  TFCApiURL="https://storage.googleapis.com/tensorflow/libtensorflow/${TFCApiFile}"
@@ -161,7 +162,8 @@ if [ "$DISTRO" = "UBUNTU" ]; then
         libtool \
         make \
         g++ \
-	ant \
+        libeigen3-dev \
+    	ant \
         unzip
     # Install version of protobuf needed by C-API
     install_protobuf3.4.0 "ubuntu"
@@ -194,7 +196,8 @@ elif [ "$DISTRO" = "DEBIAN OS" ]; then
         llvm-dev \
         libedit-dev \
         postgresql-client \
-        libtbb-dev
+        libtbb-dev \
+        libeigen3-dev
 
 ## ------------------------------------------------
 ## FEDORA
@@ -239,7 +242,7 @@ elif [[ "$DISTRO" == *"FEDORA"* ]]; then
         automake \
         libtool
     # Install version of protobuf needed by C-API
-    install_protobuf3.4.0 "fedora"        
+    install_protobuf3.4.0 "fedora"
     # Install tensorflow
     install_tf "$TFCApiFile" "$TF_VERSION" "$LinkerConfigCmd"
 
@@ -343,6 +346,7 @@ elif [ "$DISTRO" = "DARWIN" ]; then
     brew install wget
     brew install python
     brew upgrade python
+    brew install eigen
     # Brew installs correct version of Protobuf(3.5.1 >= 3.4.0)
     # So we can directly install tensorflow
     install_tf "$TFCApiFile" "$TF_VERSION" "$LinkerConfigCmd"
 
@@ -36,23 +36,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/libpg_query/ libpg_query.a)
 
 ##################################################################################
 
-# --[ tensorflow
-# Generate essential model files
-set(MODEL_GEN_DIR ${CMAKE_CURRENT_LIST_DIR}/brain/modelgen)
-set(MODEL_GEN_PY ${MODEL_GEN_DIR}/model_generator.py)
-set(MODEL_GEN_SETTINGS ${MODEL_GEN_DIR}/settings.json)
-set(MODEL_GEN_COMMAND python3 ${MODEL_GEN_PY} ${MODEL_GEN_SETTINGS} ${MODEL_GEN_DIR})
-
-message(STATUS "Tensorflow models being generated")
-execute_process(COMMAND ${MODEL_GEN_COMMAND}
-        RESULT_VARIABLE RETURN_VALUE)
-
-if (NOT RETURN_VALUE EQUAL 0)
-    message(FATAL_ERROR "Failed to generate tensorflow models.")
-endif()
-
-##################################################################################
-
 # --[ Peloton library
 
 # creates 'srcs' lists
 
@@ -0,0 +1,146 @@
+#===----------------------------------------------------------------------===#
+#
+#                         Peloton
+#
+# LSTM.py
+#
+# Identification: src/brain/modelgen/LSTM.py
+#
+# Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+#
+#===----------------------------------------------------------------------===#
+
+import tensorflow as tf
+import functools
+import os
+import argparse
+
+def lazy_property(function):
+    attribute = '_cache_' + function.__name__
+
+    @property
+    @functools.wraps(function)
+    def decorator(self):
+        if not hasattr(self, attribute):
+            setattr(self, attribute, function(self))
+        return getattr(self, attribute)
+
+    return decorator
+
+class LSTM:
+
+    def __init__(self, ntoken, ninp, nhid, nlayers, lr=0.001,
+                 dropout_ratio=0.5, clip_norm = 0.5, **kwargs):
+        """
+        :param ntoken: #features(input to encoder)
+        :param ninp: input_size to LSTM(output of encoder)
+        :param nhid: hidden layers in LSTM
+        :param nlayers: number of layers
+        :param dropout: dropout rate
+        """
+        tf.reset_default_graph()
+        self.data = tf.placeholder(tf.float32, [None, None, ntoken], name="data_")
+        self.target =  tf.placeholder(tf.float32, [None, None, ntoken], name="target_")
+        self._ntoken = ntoken
+        self._ninp = ninp
+        self._nhid = nhid
+        self._nlayers = nlayers
+        # Setting to defaults known to work well
+        self._lr = tf.placeholder_with_default(lr, shape=None,
+                                               name="learn_rate_")
+        self._dropout_ratio = tf.placeholder_with_default(dropout_ratio, shape=None,
+                                                          name="dropout_ratio_")
+        self._clip_norm = tf.placeholder_with_default(clip_norm, shape=None,
+                                                      name="clip_norm_")
+        self.tf_init = tf.global_variables_initializer
+        self.prediction
+        self.loss
+        self.optimize
+
+
+    @staticmethod
+    def stacked_lstm_cell(num_cells, hid_units, dropout):
+        cells = []
+        for i in range(num_cells):
+            cells.append(tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(hid_units),
+                                                       output_keep_prob=dropout,
+                                                       variational_recurrent=False,
+                                                       dtype=tf.float32))
+        return tf.nn.rnn_cell.MultiRNNCell(cells)
+
+    @lazy_property
+    def prediction(self):
+        net = self.data
+        kernel_init = tf.random_normal_initializer()
+        with tf.name_scope("input_linear_layer"):
+            net_shape = tf.shape(net)
+            bsz = net_shape[0]
+            bptt = net_shape[1]
+            net = tf.reshape(self.data, [-1, self._ntoken])
+            net = tf.layers.dense(net, self._ninp,
+                                  activation=tf.nn.leaky_relu,
+                                  kernel_initializer=kernel_init)
+            net = tf.reshape(net, [bsz, bptt, self._ninp])
+        with tf.name_scope("stacked_lstm_cell"):
+            stacked_lstm_cell = self.stacked_lstm_cell(self._nlayers,
+                                                       self._nhid,
+                                                       self._dropout_ratio)
+            # If GPU is present, should use highly optimized CudnnLSTMCell
+            net, _ = tf.nn.dynamic_rnn(stacked_lstm_cell,
+                                          net, dtype=tf.float32,
+                                          time_major=False)
+        with tf.name_scope("output_linear_layer"):
+            net = tf.reshape(net, [-1, self._nhid])
+            net = tf.layers.dense(net, self._ntoken,
+                                  activation=tf.nn.leaky_relu,
+                                  kernel_initializer=kernel_init)
+        net = tf.reshape(net, [bsz, bptt, -1], name="pred_")
+        return net
+
+    @lazy_property
+    def loss(self):
+        loss = tf.reduce_mean(tf.squared_difference(self.target, self.prediction), name='lossOp_')
+        return loss
+
+    @lazy_property
+    def optimize(self):
+        params = tf.trainable_variables()
+        gradients = tf.gradients(self.loss, params)
+        clipped_gradients, _ = tf.clip_by_global_norm(
+            gradients, self._clip_norm)
+        optimizer = tf.train.AdamOptimizer(learning_rate=self._lr)
+        return optimizer.apply_gradients(zip(clipped_gradients,
+                                             params), name="optimizeOp_")
+
+    def write_graph(self, dir):
+        fname = "{}.pb".format(self.__repr__())
+        abs_path = os.path.join(dir, fname)
+        if not os.path.exists(abs_path):
+            tf.train.write_graph(tf.get_default_graph(),
+                                 dir, fname, False)
+
+
+    def __repr__(self):
+        return "LSTM"
+
+def main():
+    parser = argparse.ArgumentParser(description='LSTM Model Generator')
+
+
+    parser.add_argument('--nfeats', type=int, default=3, help='Input feature length(input to encoder/linear layer)')
+    parser.add_argument('--nencoded', type=int, default=20, help='Encoded feature length(input to LSTM)')
+    parser.add_argument('--nhid', type=int, default=20, help='Number of LSTM Hidden units')
+    parser.add_argument('--nlayers', type=int, default=2, help='Number of LSTM layers')
+    parser.add_argument('--lr', type=float, default=0.001, help='Learning rate')
+    parser.add_argument('--dropout_ratio', type=float, default=0.5, help='Dropout ratio')
+    parser.add_argument('--clip_norm', type=float, default=0.5, help='Clip Norm')
+    parser.add_argument('graph_out_path', type=str, help='Path to write graph output', nargs='+')
+    args = parser.parse_args()
+    model = LSTM(args.nfeats, args.nencoded, args.nhid,
+                       args.nlayers, args.lr, args.dropout_ratio,
+                       args.clip_norm)
+    model.tf_init()
+    model.write_graph(' '.join(args.graph_out_path))
+
+if __name__ == '__main__':
+    main()