cmu-db
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎cmake/Dependencies.cmake
Lines changed: 6 additions & 0 deletions b/‎cmake/Dependencies.cmake
Lines changed: 6 additions & 0 deletions
diff --git a/‎cmake/ProtoBuf.cmake
Lines changed: 1 addition & 2 deletions b/‎cmake/ProtoBuf.cmake
Lines changed: 1 addition & 2 deletions
diff --git a/‎script/installation/packages.sh
Lines changed: 91 additions & 13 deletions b/‎script/installation/packages.sh
Lines changed: 91 additions & 13 deletions
diff --git a/‎src/CMakeLists.txt
Lines changed: 18 additions & 2 deletions b/‎src/CMakeLists.txt
Lines changed: 18 additions & 2 deletions
diff --git a/‎src/brain/modelgen/LSTM_Model.py
Lines changed: 129 additions & 0 deletions b/‎src/brain/modelgen/LSTM_Model.py
Lines changed: 129 additions & 0 deletions
@@ -306,6 +306,7 @@ docs/dev
 # Protobuf files
 *.pb.cc
 *.pb.h
+*.pb
 
 # Third party
 third_party/gflags/
 
@@ -31,6 +31,12 @@ include(cmake/CapnProtoMacros.cmake)
 # ---[ Google-protobuf
 include(cmake/ProtoBuf.cmake)
 
+# --[ tensorflow
+find_library(TFlowC
+        NAMES tensorflow
+        PATHS "/usr/local/lib")
+list(APPEND Peloton_LINKER_LIBS ${TFlowC})
+
 # ---[ Libevent
 find_package(Libevent REQUIRED)
 include_directories(SYSTEM ${LIBEVENT_INCLUDE_DIRS})
 
@@ -1,7 +1,6 @@
 # Finds Google Protocol Buffers library and compilers and extends
 # the standard cmake script with version and python generation support
-
-find_package( Protobuf REQUIRED )
+find_package( Protobuf REQUIRED)
 include_directories(SYSTEM ${PROTOBUF_INCLUDE_DIR})
 list(APPEND Peloton_LINKER_LIBS ${PROTOBUF_LIBRARIES})
 
 
@@ -17,6 +17,7 @@
 ##  * OSX
 ## =================================================================
 
+
 # Determine OS platform
 UNAME=$(uname | tr "[:upper:]" "[:lower:]")
 # If Linux, try to determine specific distribution
@@ -36,6 +37,50 @@ fi
 unset UNAME
 DISTRO=$(echo $DISTRO | tr "[:lower:]" "[:upper:]")
 TMPDIR=/tmp
+TF_VERSION="1.4.0"
+TF_TYPE="cpu"
+
+
+function install_protobuf3.4.0() {
+ # Install Relevant tooling
+ # Remove any old versions of protobuf
+ DISTRIB=$1 # ubuntu/fedora
+ if [ "$DISTRIB" == "ubuntu" ]
+ then
+    sudo apt-get --yes --force-yes remove --purge libprotobuf-dev protobuf-compiler
+ elif [ "$DISTRIB" == "fedora" ] 
+ then
+    sudo dnf -q remove -y protobuf protobuf-devel protobuf-compiler
+ else 
+    echo "Only Ubuntu and Fedora is supported currently!"
+    return 0
+ fi
+ CWD=`pwd`
+ cd $TMPDIR
+ wget -O protobuf-cpp-3.4.0.tar.gz https://github.com/google/protobuf/releases/download/v3.4.0/protobuf-cpp-3.4.0.tar.gz
+ tar -xzf protobuf-cpp-3.4.0.tar.gz
+ cd protobuf-3.4.0
+ ./autogen.sh && ./configure && make -j4 && sudo make install && sudo ldconfig
+ # Do cleanup
+ cd $CWD
+}
+
+# Utility function for installing tensorflow components of python/C++
+function install_tf() {
+ TFCApiFile=$1
+ TFBinaryURL=$2
+ LinkerConfigCmd=$3
+ TARGET_DIRECTORY="/usr/local"
+ # Install Tensorflow Python Binary
+ sudo pip3 install --upgrade ${TFBinaryURL}
+
+ # Install C-API
+ TFCApiURL="https://storage.googleapis.com/tensorflow/libtensorflow/${TFCApiFile}"
+ wget -O $TFCApiFile $TFCApiURL
+ sudo tar -C $TARGET_DIRECTORY -xzf $TFCApiFile
+ # Configure the Linker
+ eval $LinkerConfigCmd
+}
 
 ## ------------------------------------------------
 ## UBUNTU
@@ -75,13 +120,19 @@ if [ "$DISTRO" = "UBUNTU" ]; then
     if [ "$MAJOR_VER" == "14" ]; then
         PKG_CMAKE="cmake3"
         FORCE_Y="--force-yes"
+        TFBinaryURL="https://storage.googleapis.com/tensorflow/linux/${TF_TYPE}/tensorflow-${TF_VERSION}-cp34-cp34m-linux_x86_64.whl"
+    fi
+    if [ "$MAJOR_VER" == "16" ]; then
+        TFBinaryURL="https://storage.googleapis.com/tensorflow/linux/${TF_TYPE}/tensorflow-${TF_VERSION}-cp35-cp35m-linux_x86_64.whl"
     fi
     # Fix for llvm on Ubuntu 17.x
     if [ "$MAJOR_VER" == "17" ]; then
         PKG_LLVM="llvm-3.9"
         PKG_CLANG="clang-3.8"
+        TFBinaryURL="https://storage.googleapis.com/tensorflow/linux/${TF_TYPE}/tensorflow-${TF_VERSION}-cp35-cp35m-linux_x86_64.whl"
     fi
-
+    TFCApiFile="libtensorflow-${TF_TYPE}-linux-x86_64-${TF_VERSION}.tar.gz"
+    LinkerConfigCmd="sudo ldconfig"
     sudo apt-get -qq $FORCE_Y --ignore-missing -y install \
         $PKG_CMAKE \
         $PKG_LLVM \
@@ -92,9 +143,7 @@ if [ "$DISTRO" = "UBUNTU" ]; then
         flex \
         valgrind \
         lcov \
-        protobuf-compiler \
         libgflags-dev \
-        libprotobuf-dev \
         libevent-dev \
         libboost-dev \
         libboost-thread-dev \
@@ -103,7 +152,19 @@ if [ "$DISTRO" = "UBUNTU" ]; then
         libpqxx-dev \
         libedit-dev \
         libssl-dev \
-        postgresql-client
+        postgresql-client \
+        python3-pip \
+        curl \
+        autoconf \
+        automake \
+        libtool \
+        make \
+        g++ \
+        unzip
+    # Install version of protobuf needed by C-API
+    install_protobuf3.4.0 "ubuntu"
+    # Install tensorflow
+    install_tf "$TFCApiFile" "$TFBinaryURL" "$LinkerConfigCmd"
 
 ## ------------------------------------------------
 ## DEBIAN
@@ -140,14 +201,15 @@ elif [[ "$DISTRO" == *"FEDORA"* ]]; then
         26) LLVM="llvm";;
         *)  LLVM="llvm4.0";;
     esac
-
+    TFCApiFile="libtensorflow-${TF_TYPE}-linux-x86_64-${TF_VERSION}.tar.gz"
+    TFBinaryURL="https://storage.googleapis.com/tensorflow/linux/${TF_TYPE}/tensorflow-${TF_VERSION}-cp36-cp36m-linux_x86_64.whl"
+    LinkerConfigCmd="sudo ldconfig"
     sudo dnf -q install -y \
         git \
         gcc-c++ \
         make \
         cmake \
         gflags-devel \
-        protobuf-devel \
         bison \
         flex \
         libevent-devel \
@@ -166,7 +228,16 @@ elif [[ "$DISTRO" == *"FEDORA"* ]]; then
         libasan \
         libtsan \
         libubsan \
-        libatomic
+        libatomic \
+        python3-pip \
+        curl \
+        autoconf \
+        automake \
+        libtool
+    # Install version of protobuf needed by C-API
+    install_protobuf3.4.0 "fedora"        
+    # Install tensorflow
+    install_tf "$TFCApiFile" "$TFBinaryURL" "$LinkerConfigCmd"
 
 ## ------------------------------------------------
 ## REDHAT
@@ -194,18 +265,17 @@ elif [[ "$DISTRO" == *"REDHAT"* ]] && [[ "${DISTRO_VER%.*}" == "7" ]]; then
         fi
         popd; popd
         return 0
-    }
+}
 
-    # Package download paths
+#Package download paths
     PKGS=(
         "https://github.com/schuhschuh/gflags/archive/v2.0.tar.gz"
     )
-
-    # Add EPEL repository first
+#Add EPEL repository first
     sudo yum -q -y install epel-release
     sudo yum -q -y upgrade epel-release
 
-    # Simple installations via yum
+#Simple installations via yum
     sudo yum -q -y install \
         git \
         gcc-c++ \
@@ -245,7 +315,9 @@ elif [ "$DISTRO" = "DARWIN" ]; then
       echo "Installing homebrew..."
       ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
     fi
-
+    TFBinaryURL="https://storage.googleapis.com/tensorflow/mac/${TF_TYPE}/tensorflow-${TF_VERSION}-py3-none-any.whl"
+    TFCApiFile="libtensorflow-${TF_TYPE}-darwin-x86_64-${TF_VERSION}.tar.gz"
+    LinkerConfigCmd="sudo update_dyld_shared_cache"
     brew install git
     brew install cmake
     brew install gflags
@@ -260,6 +332,12 @@ elif [ "$DISTRO" = "DARWIN" ]; then
     brew install libedit
     brew install [email protected]
     brew install postgresql
+    brew install curl
+    brew install python
+    brew upgrade python
+    # Brew installs correct version of Protobuf(3.5.1 >= 3.4.0)
+    # So we can directly install tensorflow
+    install_tf "$TFCApiFile" "$TFBinaryURL" "$LinkerConfigCmd"
 
 ## ------------------------------------------------
 ## UNKNOWN
 
@@ -1,8 +1,8 @@
 ##################################################################################
-# SRC CMAKELISTS
+#SRC CMAKELISTS
 ##################################################################################
 
-# --[ Cap'nProto library
+#--[Cap'nProto library
 
 file(GLOB capnp_files include/capnp/*.capnp)
 if (capnp_files)
@@ -34,6 +34,22 @@ peloton_default_properties(peloton-proto)
 
 add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/libpg_query/ libpg_query.a)
 
+##################################################################################
+
+# --[ tensorflow
+# Generate essential model files
+set(MODEL_GEN_DIR ${CMAKE_CURRENT_LIST_DIR}/brain/modelgen)
+set(MODEL_GEN_PY ${MODEL_GEN_DIR}/model_generator.py)
+set(MODEL_GEN_SETTINGS ${MODEL_GEN_DIR}/settings.json)
+set(MODEL_GEN_COMMAND python3 ${MODEL_GEN_PY} ${MODEL_GEN_SETTINGS} ${MODEL_GEN_DIR})
+
+message(STATUS "Tensorflow models being generated")
+execute_process(COMMAND ${MODEL_GEN_COMMAND}
+        RESULT_VARIABLE RETURN_VALUE)
+
+if (NOT RETURN_VALUE EQUAL 0)
+    message(FATAL_ERROR "Failed to generate tensorflow models.")
+endif()
 
 ##################################################################################
 
 
@@ -0,0 +1,129 @@
+#===----------------------------------------------------------------------===#
+#
+#                         Peloton
+#
+# LSTM_Model.py
+#
+# Identification: src/brain/modelgen/LSTM_Model.py
+#
+# Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+#
+#===----------------------------------------------------------------------===#
+
+import tensorflow as tf
+import functools
+import os
+import argparse
+
+def lazy_property(function):
+    attribute = '_cache_' + function.__name__
+
+    @property
+    @functools.wraps(function)
+    def decorator(self):
+        if not hasattr(self, attribute):
+            setattr(self, attribute, function(self))
+        return getattr(self, attribute)
+
+    return decorator
+
+class LSTM_Model:
+    """Container module with an encoder, a recurrent module, and a decoder.
+    Encoder-decoder LSTM is useful for Seq2Seq prediction problems
+    i.e. where we have a sequence as input(previous workload info) and want to predict a sequence
+    as output(workload prediction 10 days later for 1 entire day)
+    Learning rate used: https:#www.tensorflow.org/tutorials/seq2seq - Check Gradient computation & optimization
+    Its provided: The value of learning_rate can is usually in the range 0.0001 to 0.001;
+    and can be set to decrease as training progresses
+    """
+
+    def __init__(self, ntoken, ninp, nhid, nlayers, lr=0.001,
+                 dropout_ratio=0.5, clip_norm = 0.5, **kwargs):
+        """
+        :param ntoken: #features(input to encoder)
+        :param ninp: input_size to LSTM(output of encoder)
+        :param nhid: hidden layers in LSTM
+        :param nlayers:
+        :param dropout:
+        :param tie_weights:
+        """
+        tf.reset_default_graph()
+        self.data = tf.placeholder(tf.float32, [None, None, ntoken], name="data_")
+        self.target =  tf.placeholder(tf.float32, [None, None, ntoken], name="target_")
+        self._ntoken = ntoken
+        self._ninp = ninp
+        self._nhid = nhid
+        self._nlayers = nlayers
+        # Setting to defaults known to work well
+        self._lr = tf.placeholder_with_default(lr, shape=None,
+                                               name="learn_rate_")
+        self._dropout_ratio = tf.placeholder_with_default(dropout_ratio, shape=None,
+                                                          name="dropout_ratio_")
+        self._clip_norm = tf.placeholder_with_default(clip_norm, shape=None,
+                                                      name="clip_norm_")
+        self.tf_init = tf.global_variables_initializer
+        self.prediction
+        self.loss
+        self.optimize
+
+
+    @lazy_property
+    def prediction(self):
+        # Recurrent network.
+        # Define weights
+        weights = {
+            'encoder': tf.Variable(tf.random_normal([self._ntoken, self._ninp]), name="enc_wt"),
+            'decoder': tf.Variable(tf.random_normal([self._nhid, self._ntoken]), name="dec_wt")
+        }
+        biases = {
+            'encoder': tf.Variable(tf.random_normal([self._ninp]), name="enc_bias"),
+            'decoder': tf.Variable(tf.random_normal([self._ntoken]), name="dec_bias")
+        }
+        # Reshape inputs to feed to encoder
+        bptt = tf.shape(self.data)[0]
+        bsz = tf.shape(self.data)[1]
+        input = tf.reshape(self.data, [bptt * bsz, self._ntoken])
+        # Apply encoder to get workload-embeddings
+        emb = tf.matmul(input, weights["encoder"]) + biases["encoder"]
+        # Reshape embeddings to feed to Stacked LSTM
+        emb = tf.reshape(emb, [bptt, bsz, self._ninp])
+        stacked_lstm_cell = tf.nn.rnn_cell.\
+            MultiRNNCell([tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(self._nhid),
+                                                        output_keep_prob=self._dropout_ratio)
+                          for _ in range(self._nlayers)])
+        # time_major true => time steps x batch size  x features.
+        # time_major false => batch_size x time_steps x features
+        output, _ = tf.nn.dynamic_rnn(stacked_lstm_cell,
+                                      emb, dtype=tf.float32,
+                                      time_major=False)
+        # Apply decoder to get output
+        decoder_in = tf.reshape(output, [bptt * bsz, self._nhid])
+        decoded = tf.matmul(decoder_in, weights["decoder"]) + biases["decoder"]
+        pred = tf.reshape(decoded, [bptt, bsz, -1], name="pred_")
+        return pred
+
+    @lazy_property
+    def loss(self):
+        loss = tf.reduce_mean(tf.squared_difference(self.target, self.prediction), name='lossOp_')
+        return loss
+
+    @lazy_property
+    def optimize(self):
+        params = tf.trainable_variables()
+        gradients = tf.gradients(self.loss, params)
+        clipped_gradients, _ = tf.clip_by_global_norm(
+            gradients, self._clip_norm)
+        optimizer = tf.train.AdamOptimizer(learning_rate=self._lr)
+        return optimizer.apply_gradients(zip(clipped_gradients,
+                                             params), name="optimizeOp_")
+
+    def write_graph(self, dir):
+        fname = "{}.pb".format(self.__repr__())
+        abs_path = os.path.join(dir, fname)
+        if not os.path.exists(abs_path):
+            tf.train.write_graph(tf.get_default_graph(),
+                                 dir, fname, False)
+
+
+    def __repr__(self):
+        return "LSTM"