diff --git a/Dockerfile-build-python27-lambda b/Dockerfile-build-python27-lambda new file mode 100644 index 0000000..e5395c7 --- /dev/null +++ b/Dockerfile-build-python27-lambda @@ -0,0 +1,82 @@ +# Dockerfile for building RDKit artifacts. +# This image contains all aspects of RDKit plus the build system. +# Its purpose is to create the RDKit artifacts that will be deployed to AWS Lambda. + +# The LambCI project provides a number of Docker images that closely mimic the +# AWS Lambda environment. More details here: https://github.com/lambci/docker-lambda +FROM lambci/lambda:build-python2.7 + +# AWS Lambda is based on CentOS, which only provides Boost 1.53 and does not work with RDKit. +# For the time being, exclude installing the boost packages and instead build from source. +# We can revert this once updated boost packages are available. + +RUN yum install -y --enablerepo=epel --setopt=tsflags=nodocs --setopt=override_install_langs=en_US.utf8\ + readline-devel\ + zlib-devel\ + bzip2-devel\ + sqlite-devel\ + @development\ + cmake3\ + python27-devel\ + python27-numpy\ + #boost\ + #boost-python\ + #boost-devel\ + eigen3\ + eigen3-devel\ + swig\ + git\ + yum clean all &&\ + rm -rf /var/cache/yum + + +# Here we build boost from source. +# This will be unnecessary once updated boost packages are available. +WORKDIR /root/boost +RUN curl -L -o boost.tgz http://sourceforge.net/projects/boost/files/boost/1.56.0/boost_1_56_0.tar.gz &&\ + tar xfz boost.tgz &&\ + rm -f boost.tgz &&\ + cd boost_1_56_0 &&\ + ./bootstrap.sh --with-libraries=python,serialization &&\ + ./b2 install + +# Tar up the built libraries (which have been installed into /usr/local/lib) +# as we'll need them for the runtime images. +# They need to go in /usr/lib64 +WORKDIR /usr/local/lib +RUN tar cvfz /root/boost-1.56.0.tgz libboost*so.* + +WORKDIR / + +# Clone the RDKit repo and do the build +ARG RDKIT_BRANCH=master +RUN git clone -b $RDKIT_BRANCH --single-branch https://github.com/rdkit/rdkit.git + +ENV RDBASE=/rdkit +ENV JAVA_HOME=/usr/lib/jvm/java +ENV CLASSPATH=$RDBASE/Code/JavaWrappers/gmwrapper/org.RDKit.jar + +RUN mkdir $RDBASE/build +WORKDIR $RDBASE/build + +RUN cmake3 -Wno-dev\ + -DBOOST_ROOT=/root/boost/boost_1_56_0\ + -DLIB_SUFFIX=64\ + -DRDK_INSTALL_INTREE=OFF\ + -DRDK_BUILD_INCHI_SUPPORT=ON\ + -DRDK_BUILD_AVALON_SUPPORT=ON\ + -DRDK_BUILD_PYTHON_WRAPPERS=ON\ + -DRDK_BUILD_SWIG_WRAPPERS=OFF\ + -DCMAKE_INSTALL_PREFIX=/usr\ + .. + +RUN nproc=$(getconf _NPROCESSORS_ONLN)\ + && make -j $(( nproc > 2 ? nproc - 2 : 1 ))\ +# && make install\ + && cpack3 -G RPM + +ENV CLASSPATH=$RDBASE/Code/JavaWrappers/gmwrapper/org.RDKit.jar +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$RDBASE/lib:$RDBASE/Code/JavaWrappers/gmwrapper:/usr/lib/x86_64-linux-gnu +ENV PYTHONPATH=$PYTHONPATH:$RDBASE +WORKDIR $RDBASE + diff --git a/Dockerfile-build-python36-lambda b/Dockerfile-build-python36-lambda new file mode 100644 index 0000000..5ef4ff8 --- /dev/null +++ b/Dockerfile-build-python36-lambda @@ -0,0 +1,94 @@ +# Dockerfile for building RDKit artifacts. +# This image contains all aspects of RDKit plus the build system. +# Its purpose is to create the RDKit artifacts that will be deployed to AWS Lambda. + +# The LambCI project provides a number of Docker images that closely mimic the +# AWS Lambda environment. More details here: https://github.com/lambci/docker-lambda +FROM lambci/lambda:build-python3.6 + +# AWS Lambda is based on CentOS, which only provides Boost 1.53 and does not work with RDKit. +# For the time being, exclude installing the boost packages and instead build from source. +# We can revert this once updated boost packages are available. + +RUN yum install -y --enablerepo=epel --setopt=tsflags=nodocs --setopt=override_install_langs=en_US.utf8\ + readline-devel\ + zlib-devel\ + bzip2-devel\ + sqlite-devel\ + @development\ + cmake3\ + #boost\ + #boost-python\ + #boost-devel\ + eigen3\ + eigen3-devel\ + swig\ + git\ + yum clean all &&\ + rm -rf /var/cache/yum + +# Unlike the 'build-python2.7' image, this LambCI image does not install Python from +# the Amazon repo and instead builds from source. The Python base is /var/lang/bin/python3.6, +# which must be aliased in order for the RDKit build and packaging scripts to locate +# the interpreter. +RUN ln -sf /var/lang/bin/python3.6 /usr/bin/python +RUN ln -sf /var/lang/bin/python3.6 /usr/bin/python3 +RUN ln -s /var/lang/include/python3.6m /var/lang/include/python3.6 + +# Numpy for Python 3.6 is not available from the Amazon repo. +RUN pip install numpy + +# Here we build boost from source. +# This will be unnecessary once updated boost packages are available. +WORKDIR /root/boost +RUN curl -L -o boost.tgz http://sourceforge.net/projects/boost/files/boost/1.56.0/boost_1_56_0.tar.gz &&\ + tar xfz boost.tgz &&\ + rm -f boost.tgz &&\ + cd boost_1_56_0 &&\ + ./bootstrap.sh --with-libraries=python,serialization &&\ + ./b2 install + +# Tar up the built libraries (which have been installed into /usr/local/lib) +# as we'll need them for the runtime images. +# They need to go in /usr/lib64 +WORKDIR /usr/local/lib +RUN tar cvfz /root/boost-1.56.0.tgz libboost*so.* + +WORKDIR / + +# Clone the RDKit repo and do the build +ARG RDKIT_BRANCH=master +RUN git clone -b $RDKIT_BRANCH --single-branch https://github.com/rdkit/rdkit.git + +ENV RDBASE=/rdkit +ENV JAVA_HOME=/usr/lib/jvm/java +ENV CLASSPATH=$RDBASE/Code/JavaWrappers/gmwrapper/org.RDKit.jar + +RUN mkdir $RDBASE/build +WORKDIR $RDBASE/build + +# Extra options here to let the RDKit build know which Python install to use +RUN cmake3 -Wno-dev\ + -DPYTHON_EXECUTABLE=/var/lang/bin/python3.6\ + -DPYTHON_INCLUDE_DIR=/var/lang/include/python3.6\ + -DPYTHON_LIBRARY=/var/lang/lib/python3.6/config-3.6m-x86_64-linux-gnu/libpython3.6m.a\ + -DBOOST_ROOT=/root/boost/boost_1_56_0\ + -DLIB_SUFFIX=64\ + -DRDK_INSTALL_INTREE=OFF\ + -DRDK_BUILD_INCHI_SUPPORT=ON\ + -DRDK_BUILD_AVALON_SUPPORT=ON\ + -DRDK_BUILD_PYTHON_WRAPPERS=ON\ + -DRDK_BUILD_SWIG_WRAPPERS=OFF\ + -DCMAKE_INSTALL_PREFIX=/usr\ + .. + +RUN nproc=$(getconf _NPROCESSORS_ONLN)\ + && make -j $(( nproc > 2 ? nproc - 2 : 1 ))\ +# && make install\ + && cpack3 -G RPM + +ENV CLASSPATH=$RDBASE/Code/JavaWrappers/gmwrapper/org.RDKit.jar +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$RDBASE/lib:$RDBASE/Code/JavaWrappers/gmwrapper:/usr/lib/x86_64-linux-gnu +ENV PYTHONPATH=$PYTHONPATH:$RDBASE +WORKDIR $RDBASE + diff --git a/Dockerfile-python27-lambda b/Dockerfile-python27-lambda new file mode 100644 index 0000000..4b5d1ec --- /dev/null +++ b/Dockerfile-python27-lambda @@ -0,0 +1,38 @@ +# Dockerfile for packaging a Python based RDKit implementation to deploy +# as an AWS Lambda layer. See comments in build-python27-lambda.sh for +# details. + +FROM lambci/lambda:build-python2.7 + +RUN yum install -y --setopt=tsflags=nodocs --setopt=override_install_langs=en_US.utf8\ + python27-numpy\ + sqlite\ + sqlite-devel\ + yum clean -y all &&\ + rm -rf /var/cache/yum + +ARG DOCKER_TAG=latest + +COPY artifacts/python27-lambda/$DOCKER_TAG/rpms/RDKit-*-Linux-Runtime.rpm artifacts/python27-lambda/$DOCKER_TAG/rpms/RDKit-*-Linux-Python.rpm artifacts/python27-lambda/$DOCKER_TAG/boost/boost-1.56.0.tgz /tmp/ +RUN cd /usr/lib64 && tar xvfz /tmp/boost-1.56.0.tgz && rm /tmp/boost-1.56.0.tgz && ls -l /usr/lib64/*boost* +RUN rpm --nodeps -iv /tmp/*.rpm && rm -f /tmp/*.rpm + +# Copy all files for the lambda layer. +# Note this does NOT include the NumPy dependency - instead this should +# be provided by using this layer together with the public AWS SciPy1x layer when +# creating a lambda function. +RUN mkdir -p /tmp/layer/lib && mkdir -p /tmp/layer/python +RUN cp -av /usr/lib64/python2.7/dist-packages/rdkit /tmp/layer/python +RUN cp -av /usr/lib64/libRDKit* /tmp/layer/lib +RUN cp -av /usr/lib64/libboost_* /tmp/layer/lib +RUN cp -av /usr/lib64/libsqlite3* /tmp/layer/lib + +# Assemble the lambda layer as a zipfile +WORKDIR /tmp/layer +RUN zip --symlinks -rv rdkit-python27.zip * + +WORKDIR / + +# add the rdkit user +RUN useradd -u 1000 -U -G 0 rdkit +USER 1000 diff --git a/Dockerfile-python36-lambda b/Dockerfile-python36-lambda new file mode 100644 index 0000000..40bfa85 --- /dev/null +++ b/Dockerfile-python36-lambda @@ -0,0 +1,41 @@ +# Dockerfile for packaging a Python based RDKit implementation to deploy +# as an AWS Lambda layer. See comments in build-python36-lambda.sh for +# details. + +FROM lambci/lambda:build-python3.6 + +RUN yum install -y --setopt=tsflags=nodocs --setopt=override_install_langs=en_US.utf8\ + sqlite\ + sqlite-devel\ + yum clean -y all &&\ + rm -rf /var/cache/yum + +RUN pip install numpy +RUN ln -s /usr/lib/python3.6/site-packages/rdkit /var/lang/lib/python3.6/site-packages/rdkit + +ARG DOCKER_TAG=latest + +COPY artifacts/python36-lambda/$DOCKER_TAG/rpms/RDKit-*-Linux-Runtime.rpm artifacts/python36-lambda/$DOCKER_TAG/rpms/RDKit-*-Linux-Python.rpm artifacts/python36-lambda/$DOCKER_TAG/boost/boost-1.56.0.tgz /tmp/ +RUN cd /usr/lib64 && tar xvfz /tmp/boost-1.56.0.tgz && rm /tmp/boost-1.56.0.tgz && ls -l /usr/lib64/*boost* +RUN rpm --nodeps -iv /tmp/*.rpm && rm -f /tmp/*.rpm + +# Copy all files for the lambda layer +# Note that this DOES include the NumPy dependency - attempts to use +# the layer built from this image with the AWS provided Python 3.6 +# SciPy1x layer were not successful. +RUN mkdir -p /tmp/layer/lib && mkdir -p /tmp/layer/python +RUN cp -av /usr/lib/python3.6/site-packages/rdkit /tmp/layer/python +RUN cp -av /var/lang/lib/python3.6/site-packages/numpy /tmp/layer/python +RUN cp -av /usr/lib64/libRDKit* /tmp/layer/lib +RUN cp -av /usr/lib64/libboost_* /tmp/layer/lib +RUN cp -av /usr/lib64/libsqlite3* /tmp/layer/lib + +# Assemble the lambda layer as a zipfile +WORKDIR /tmp/layer +RUN zip --symlinks -rv rdkit-python36.zip * + +WORKDIR / + +# add the rdkit user +RUN useradd -u 1000 -U -G 0 rdkit +USER 1000 diff --git a/build-python27-lambda.sh b/build-python27-lambda.sh new file mode 100755 index 0000000..e402804 --- /dev/null +++ b/build-python27-lambda.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# +# AWS Lambda based build +# +# NOTE - this is a bit a a hack as Centos7 comes with boost version 1.53, and several +# of the required libraries require this version, but recent RDKit builds (since the +# switch to modern C++) require boost 1.56 or later. +# The solution is to build boost binaries for 1.56 so that RDKit can be built against +# those and to copy those binaries into the destination image, and to use the --nodeps +# option when rpm installing the RDKit RPMs. +# The resulting image has both versions of boost in /usr/lib64 and RDKit seems to be +# quite happy with this. +# +# Credit to Paolo Tosco for helping to work out a strategy for this. + +set -ex + +source params.sh + +# build RDKit +docker build --no-cache -f Dockerfile-build-python27-lambda\ + -t $BASE/rdkit-build-python27-lambda:$DOCKER_TAG\ + --build-arg RDKIT_BRANCH=$GIT_BRANCH . + +# copy the packages +rm -rf artifacts/python27-lambda/$DOCKER_TAG +mkdir -p artifacts/python27-lambda/$DOCKER_TAG/rpms +mkdir -p artifacts/python27-lambda/$DOCKER_TAG/java +mkdir -p artifacts/python27-lambda/$DOCKER_TAG/boost +mkdir -p artifacts/python27-lambda/$DOCKER_TAG/layer +docker run -it --rm -u $(id -u)\ + -v $PWD/artifacts/python27-lambda/$DOCKER_TAG:/tohere:Z\ + $BASE/rdkit-build-python27-lambda:$DOCKER_TAG bash -c 'cp build/*.rpm /tohere/rpms && cp /root/boost-1.56.0.tgz /tohere/boost' + +# build image for python +docker build --no-cache -f Dockerfile-python27-lambda\ + -t $BASE/rdkit-python27-lambda:$DOCKER_TAG\ + --build-arg DOCKER_TAG=$DOCKER_TAG . +echo "Built image informaticsmatters/rdkit-python27-lambda:$DOCKER_TAG" + +# copy the assembled layer +docker run -it --rm -u $(id -u)\ + -v $PWD/artifacts/python27-lambda/$DOCKER_TAG:/tohere:Z\ + $BASE/rdkit-python27-lambda:$DOCKER_TAG bash -c 'cp /tmp/layer/rdkit-python27.zip /tohere/layer' + diff --git a/build-python36-lambda.sh b/build-python36-lambda.sh new file mode 100755 index 0000000..ffe5703 --- /dev/null +++ b/build-python36-lambda.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# +# AWS Lambda based build +# +# NOTE - this is a bit a a hack as Centos7 comes with boost version 1.53, and several +# of the required libraries require this version, but recent RDKit builds (since the +# switch to modern C++) require boost 1.56 or later. +# The solution is to build boost binaries for 1.56 so that RDKit can be built against +# those and to copy those binaries into the destination image, and to use the --nodeps +# option when rpm installing the RDKit RPMs. +# The resulting image has both versions of boost in /usr/lib64 and RDKit seems to be +# quite happy with this. +# +# Credit to Paolo Tosco for helping to work out a strategy for this. + +set -ex + +source params.sh + +# build RDKit +docker build --no-cache -f Dockerfile-build-python36-lambda\ + -t $BASE/rdkit-build-python36-lambda:$DOCKER_TAG\ + --build-arg RDKIT_BRANCH=$GIT_BRANCH . + +# copy the packages +rm -rf artifacts/python36-lambda/$DOCKER_TAG +mkdir -p artifacts/python36-lambda/$DOCKER_TAG/rpms +mkdir -p artifacts/python36-lambda/$DOCKER_TAG/java +mkdir -p artifacts/python36-lambda/$DOCKER_TAG/boost +mkdir -p artifacts/python36-lambda/$DOCKER_TAG/layer +docker run -it --rm -u $(id -u)\ + -v $PWD/artifacts/python36-lambda/$DOCKER_TAG:/tohere:Z\ + $BASE/rdkit-build-python36-lambda:$DOCKER_TAG bash -c 'cp build/*.rpm /tohere/rpms && cp /root/boost-1.56.0.tgz /tohere/boost' + +# build image for python +docker build --no-cache -f Dockerfile-python36-lambda\ + -t $BASE/rdkit-python36-lambda:$DOCKER_TAG\ + --build-arg DOCKER_TAG=$DOCKER_TAG . +echo "Built image informaticsmatters/rdkit-python36-lambda:$DOCKER_TAG" + +# copy the assembled layer +docker run -it --rm -u $(id -u)\ + -v $PWD/artifacts/python36-lambda/$DOCKER_TAG:/tohere:Z\ + $BASE/rdkit-python36-lambda:$DOCKER_TAG bash -c 'cp /tmp/layer/rdkit-python36.zip /tohere/layer' + diff --git a/params.sh b/params.sh index 210618e..e777969 100644 --- a/params.sh +++ b/params.sh @@ -1,7 +1,7 @@ -export DOCKER_TAG=latest +export DOCKER_TAG=Release_2018_09_3 export GIT_REPO=https://github.com/rdkit/rdkit.git -export GIT_BRANCH=master -unset GIT_TAG +export GIT_BRANCH=Release_2018_09_3 +export GIT_TAG=$DOCKER_TAG export BASE=informaticsmatters -echo "DOCKER_TAG=$DOCKER_TAG GIT_BRANCH=$GIT_BRANCH no tag" +echo "DOCKER_TAG=$DOCKER_TAG GIT_BRANCH=$GIT_BRANCH GIT_TAG=$GIT_TAG"