Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions Dockerfile-build-python27-lambda
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Dockerfile for building RDKit artifacts.
# This image contains all aspects of RDKit plus the build system.
# Its purpose is to create the RDKit artifacts that will be deployed to AWS Lambda.

# The LambCI project provides a number of Docker images that closely mimic the
# AWS Lambda environment. More details here: https://github.com/lambci/docker-lambda
FROM lambci/lambda:build-python2.7

# AWS Lambda is based on CentOS, which only provides Boost 1.53 and does not work with RDKit.
# For the time being, exclude installing the boost packages and instead build from source.
# We can revert this once updated boost packages are available.

RUN yum install -y --enablerepo=epel --setopt=tsflags=nodocs --setopt=override_install_langs=en_US.utf8\
readline-devel\
zlib-devel\
bzip2-devel\
sqlite-devel\
@development\
cmake3\
python27-devel\
python27-numpy\
#boost\
#boost-python\
#boost-devel\
eigen3\
eigen3-devel\
swig\
git\
yum clean all &&\
rm -rf /var/cache/yum


# Here we build boost from source.
# This will be unnecessary once updated boost packages are available.
WORKDIR /root/boost
RUN curl -L -o boost.tgz http://sourceforge.net/projects/boost/files/boost/1.56.0/boost_1_56_0.tar.gz &&\
tar xfz boost.tgz &&\
rm -f boost.tgz &&\
cd boost_1_56_0 &&\
./bootstrap.sh --with-libraries=python,serialization &&\
./b2 install

# Tar up the built libraries (which have been installed into /usr/local/lib)
# as we'll need them for the runtime images.
# They need to go in /usr/lib64
WORKDIR /usr/local/lib
RUN tar cvfz /root/boost-1.56.0.tgz libboost*so.*

WORKDIR /

# Clone the RDKit repo and do the build
ARG RDKIT_BRANCH=master
RUN git clone -b $RDKIT_BRANCH --single-branch https://github.com/rdkit/rdkit.git

# hack to build cartridge packages. can be removed once this code hits the repo
COPY patch_pgsql_rpm.patch /rdkit
RUN cd /rdkit && patch -p1 < patch_pgsql_rpm.patch

ENV RDBASE=/rdkit
ENV JAVA_HOME=/usr/lib/jvm/java
ENV CLASSPATH=$RDBASE/Code/JavaWrappers/gmwrapper/org.RDKit.jar

RUN mkdir $RDBASE/build
WORKDIR $RDBASE/build

RUN cmake3 -Wno-dev\
-DBOOST_ROOT=/root/boost/boost_1_56_0\
-DLIB_SUFFIX=64\
-DRDK_INSTALL_INTREE=OFF\
-DRDK_BUILD_INCHI_SUPPORT=ON\
-DRDK_BUILD_AVALON_SUPPORT=ON\
-DRDK_BUILD_PYTHON_WRAPPERS=ON\
-DRDK_BUILD_SWIG_WRAPPERS=OFF\
-DCMAKE_INSTALL_PREFIX=/usr\
..

RUN nproc=$(getconf _NPROCESSORS_ONLN)\
&& make -j $(( nproc > 2 ? nproc - 2 : 1 ))\
# && make install\
&& cpack3 -G RPM

ENV CLASSPATH=$RDBASE/Code/JavaWrappers/gmwrapper/org.RDKit.jar
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$RDBASE/lib:$RDBASE/Code/JavaWrappers/gmwrapper:/usr/lib/x86_64-linux-gnu
ENV PYTHONPATH=$PYTHONPATH:$RDBASE
WORKDIR $RDBASE

98 changes: 98 additions & 0 deletions Dockerfile-build-python36-lambda
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Dockerfile for building RDKit artifacts.
# This image contains all aspects of RDKit plus the build system.
# Its purpose is to create the RDKit artifacts that will be deployed to AWS Lambda.

# The LambCI project provides a number of Docker images that closely mimic the
# AWS Lambda environment. More details here: https://github.com/lambci/docker-lambda
FROM lambci/lambda:build-python3.6

# AWS Lambda is based on CentOS, which only provides Boost 1.53 and does not work with RDKit.
# For the time being, exclude installing the boost packages and instead build from source.
# We can revert this once updated boost packages are available.

RUN yum install -y --enablerepo=epel --setopt=tsflags=nodocs --setopt=override_install_langs=en_US.utf8\
readline-devel\
zlib-devel\
bzip2-devel\
sqlite-devel\
@development\
cmake3\
#boost\
#boost-python\
#boost-devel\
eigen3\
eigen3-devel\
swig\
git\
yum clean all &&\
rm -rf /var/cache/yum

# Unlike the 'build-python2.7' image, this LambCI image does not install Python from
# the Amazon repo and instead builds from source. The Python base is /var/lang/bin/python3.6,
# which must be aliased in order for the RDKit build and packaging scripts to locate
# the interpreter.
RUN ln -sf /var/lang/bin/python3.6 /usr/bin/python
RUN ln -sf /var/lang/bin/python3.6 /usr/bin/python3
RUN ln -s /var/lang/include/python3.6m /var/lang/include/python3.6

# Numpy for Python 3.6 is not available from the Amazon repo.
RUN pip install numpy

# Here we build boost from source.
# This will be unnecessary once updated boost packages are available.
WORKDIR /root/boost
RUN curl -L -o boost.tgz http://sourceforge.net/projects/boost/files/boost/1.56.0/boost_1_56_0.tar.gz &&\
tar xfz boost.tgz &&\
rm -f boost.tgz &&\
cd boost_1_56_0 &&\
./bootstrap.sh --with-libraries=python,serialization &&\
./b2 install

# Tar up the built libraries (which have been installed into /usr/local/lib)
# as we'll need them for the runtime images.
# They need to go in /usr/lib64
WORKDIR /usr/local/lib
RUN tar cvfz /root/boost-1.56.0.tgz libboost*so.*

WORKDIR /

# Clone the RDKit repo and do the build
ARG RDKIT_BRANCH=master
RUN git clone -b $RDKIT_BRANCH --single-branch https://github.com/rdkit/rdkit.git

# hack to build cartridge packages. can be removed once this code hits the repo
COPY patch_pgsql_rpm.patch /rdkit
RUN cd /rdkit && patch -p1 < patch_pgsql_rpm.patch

ENV RDBASE=/rdkit
ENV JAVA_HOME=/usr/lib/jvm/java
ENV CLASSPATH=$RDBASE/Code/JavaWrappers/gmwrapper/org.RDKit.jar

RUN mkdir $RDBASE/build
WORKDIR $RDBASE/build

# Extra options here to let the RDKit build know which Python install to use
RUN cmake3 -Wno-dev\
-DPYTHON_EXECUTABLE=/var/lang/bin/python3.6\
-DPYTHON_INCLUDE_DIR=/var/lang/include/python3.6\
-DPYTHON_LIBRARY=/var/lang/lib/python3.6/config-3.6m-x86_64-linux-gnu/libpython3.6m.a\
-DBOOST_ROOT=/root/boost/boost_1_56_0\
-DLIB_SUFFIX=64\
-DRDK_INSTALL_INTREE=OFF\
-DRDK_BUILD_INCHI_SUPPORT=ON\
-DRDK_BUILD_AVALON_SUPPORT=ON\
-DRDK_BUILD_PYTHON_WRAPPERS=ON\
-DRDK_BUILD_SWIG_WRAPPERS=OFF\
-DCMAKE_INSTALL_PREFIX=/usr\
..

RUN nproc=$(getconf _NPROCESSORS_ONLN)\
&& make -j $(( nproc > 2 ? nproc - 2 : 1 ))\
# && make install\
&& cpack3 -G RPM

ENV CLASSPATH=$RDBASE/Code/JavaWrappers/gmwrapper/org.RDKit.jar
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$RDBASE/lib:$RDBASE/Code/JavaWrappers/gmwrapper:/usr/lib/x86_64-linux-gnu
ENV PYTHONPATH=$PYTHONPATH:$RDBASE
WORKDIR $RDBASE

38 changes: 38 additions & 0 deletions Dockerfile-python27-lambda
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Dockerfile for packaging a Python based RDKit implementation to deploy
# as an AWS Lambda layer. See comments in build-python27-lambda.sh for
# details.

FROM lambci/lambda:build-python2.7

RUN yum install -y --setopt=tsflags=nodocs --setopt=override_install_langs=en_US.utf8\
python27-numpy\
sqlite\
sqlite-devel\
yum clean -y all &&\
rm -rf /var/cache/yum

ARG DOCKER_TAG=latest

COPY artifacts/python27-lambda/$DOCKER_TAG/rpms/RDKit-*-Linux-Runtime.rpm artifacts/python27-lambda/$DOCKER_TAG/rpms/RDKit-*-Linux-Python.rpm artifacts/python27-lambda/$DOCKER_TAG/boost/boost-1.56.0.tgz /tmp/
RUN cd /usr/lib64 && tar xvfz /tmp/boost-1.56.0.tgz && rm /tmp/boost-1.56.0.tgz && ls -l /usr/lib64/*boost*
RUN rpm --nodeps -iv /tmp/*.rpm && rm -f /tmp/*.rpm

# Copy all files for the lambda layer.
# Note this does NOT include the NumPy dependency - instead this should
# be provided by using this layer together with the public AWS SciPy1x layer when
# creating a lambda function.
RUN mkdir -p /tmp/layer/lib && mkdir -p /tmp/layer/python
RUN cp -av /usr/lib64/python2.7/dist-packages/rdkit /tmp/layer/python
RUN cp -av /usr/lib64/libRDKit* /tmp/layer/lib
RUN cp -av /usr/lib64/libboost_* /tmp/layer/lib
RUN cp -av /usr/lib64/libsqlite3* /tmp/layer/lib

# Assemble the lambda layer as a zipfile
WORKDIR /tmp/layer
RUN zip --symlinks -rv rdkit-python27.zip *

WORKDIR /

# add the rdkit user
RUN useradd -u 1000 -U -G 0 rdkit
USER 1000
41 changes: 41 additions & 0 deletions Dockerfile-python36-lambda
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Dockerfile for packaging a Python based RDKit implementation to deploy
# as an AWS Lambda layer. See comments in build-python36-lambda.sh for
# details.

FROM lambci/lambda:build-python3.6

RUN yum install -y --setopt=tsflags=nodocs --setopt=override_install_langs=en_US.utf8\
sqlite\
sqlite-devel\
yum clean -y all &&\
rm -rf /var/cache/yum

RUN pip install numpy
RUN ln -s /usr/lib/python3.6/site-packages/rdkit /var/lang/lib/python3.6/site-packages/rdkit

ARG DOCKER_TAG=latest

COPY artifacts/python36-lambda/$DOCKER_TAG/rpms/RDKit-*-Linux-Runtime.rpm artifacts/python36-lambda/$DOCKER_TAG/rpms/RDKit-*-Linux-Python.rpm artifacts/python36-lambda/$DOCKER_TAG/boost/boost-1.56.0.tgz /tmp/
RUN cd /usr/lib64 && tar xvfz /tmp/boost-1.56.0.tgz && rm /tmp/boost-1.56.0.tgz && ls -l /usr/lib64/*boost*
RUN rpm --nodeps -iv /tmp/*.rpm && rm -f /tmp/*.rpm

# Copy all files for the lambda layer
# Note that this DOES include the NumPy dependency - attempts to use
# the layer built from this image with the AWS provided Python 3.6
# SciPy1x layer were not successful.
RUN mkdir -p /tmp/layer/lib && mkdir -p /tmp/layer/python
RUN cp -av /usr/lib/python3.6/site-packages/rdkit /tmp/layer/python
RUN cp -av /var/lang/lib/python3.6/site-packages/numpy /tmp/layer/python
RUN cp -av /usr/lib64/libRDKit* /tmp/layer/lib
RUN cp -av /usr/lib64/libboost_* /tmp/layer/lib
RUN cp -av /usr/lib64/libsqlite3* /tmp/layer/lib

# Assemble the lambda layer as a zipfile
WORKDIR /tmp/layer
RUN zip --symlinks -rv rdkit-python36.zip *

WORKDIR /

# add the rdkit user
RUN useradd -u 1000 -U -G 0 rdkit
USER 1000
45 changes: 45 additions & 0 deletions build-python27-lambda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash
#
# AWS Lambda based build
#
# NOTE - this is a bit a a hack as Centos7 comes with boost version 1.53, and several
# of the required libraries require this version, but recent RDKit builds (since the
# switch to modern C++) require boost 1.56 or later.
# The solution is to build boost binaries for 1.56 so that RDKit can be built against
# those and to copy those binaries into the destination image, and to use the --nodeps
# option when rpm installing the RDKit RPMs.
# The resulting image has both versions of boost in /usr/lib64 and RDKit seems to be
# quite happy with this.
#
# Credit to Paolo Tosco for helping to work out a strategy for this.

set -ex

source params.sh

# build RDKit
docker build --no-cache -f Dockerfile-build-python27-lambda\
-t $BASE/rdkit-build-python27-lambda:$DOCKER_TAG\
--build-arg RDKIT_BRANCH=$GIT_BRANCH .

# copy the packages
rm -rf artifacts/python27-lambda/$DOCKER_TAG
mkdir -p artifacts/python27-lambda/$DOCKER_TAG/rpms
mkdir -p artifacts/python27-lambda/$DOCKER_TAG/java
mkdir -p artifacts/python27-lambda/$DOCKER_TAG/boost
mkdir -p artifacts/python27-lambda/$DOCKER_TAG/layer
docker run -it --rm -u $(id -u)\
-v $PWD/artifacts/python27-lambda/$DOCKER_TAG:/tohere:Z\
$BASE/rdkit-build-python27-lambda:$DOCKER_TAG bash -c 'cp build/*.rpm /tohere/rpms && cp /root/boost-1.56.0.tgz /tohere/boost'

# build image for python
docker build --no-cache -f Dockerfile-python27-lambda\
-t $BASE/rdkit-python27-lambda:$DOCKER_TAG\
--build-arg DOCKER_TAG=$DOCKER_TAG .
echo "Built image informaticsmatters/rdkit-python27-lambda:$DOCKER_TAG"

# copy the assembled layer
docker run -it --rm -u $(id -u)\
-v $PWD/artifacts/python27-lambda/$DOCKER_TAG:/tohere:Z\
$BASE/rdkit-python27-lambda:$DOCKER_TAG bash -c 'cp /tmp/layer/rdkit-python27.zip /tohere/layer'

45 changes: 45 additions & 0 deletions build-python36-lambda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash
#
# AWS Lambda based build
#
# NOTE - this is a bit a a hack as Centos7 comes with boost version 1.53, and several
# of the required libraries require this version, but recent RDKit builds (since the
# switch to modern C++) require boost 1.56 or later.
# The solution is to build boost binaries for 1.56 so that RDKit can be built against
# those and to copy those binaries into the destination image, and to use the --nodeps
# option when rpm installing the RDKit RPMs.
# The resulting image has both versions of boost in /usr/lib64 and RDKit seems to be
# quite happy with this.
#
# Credit to Paolo Tosco for helping to work out a strategy for this.

set -ex

source params.sh

# build RDKit
docker build --no-cache -f Dockerfile-build-python36-lambda\
-t $BASE/rdkit-build-python36-lambda:$DOCKER_TAG\
--build-arg RDKIT_BRANCH=$GIT_BRANCH .

# copy the packages
rm -rf artifacts/python36-lambda/$DOCKER_TAG
mkdir -p artifacts/python36-lambda/$DOCKER_TAG/rpms
mkdir -p artifacts/python36-lambda/$DOCKER_TAG/java
mkdir -p artifacts/python36-lambda/$DOCKER_TAG/boost
mkdir -p artifacts/python36-lambda/$DOCKER_TAG/layer
docker run -it --rm -u $(id -u)\
-v $PWD/artifacts/python36-lambda/$DOCKER_TAG:/tohere:Z\
$BASE/rdkit-build-python36-lambda:$DOCKER_TAG bash -c 'cp build/*.rpm /tohere/rpms && cp /root/boost-1.56.0.tgz /tohere/boost'

# build image for python
docker build --no-cache -f Dockerfile-python36-lambda\
-t $BASE/rdkit-python36-lambda:$DOCKER_TAG\
--build-arg DOCKER_TAG=$DOCKER_TAG .
echo "Built image informaticsmatters/rdkit-python36-lambda:$DOCKER_TAG"

# copy the assembled layer
docker run -it --rm -u $(id -u)\
-v $PWD/artifacts/python36-lambda/$DOCKER_TAG:/tohere:Z\
$BASE/rdkit-python36-lambda:$DOCKER_TAG bash -c 'cp /tmp/layer/rdkit-python36.zip /tohere/layer'

8 changes: 4 additions & 4 deletions params.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
export DOCKER_TAG=latest
export DOCKER_TAG=Release_2018_09_3
export GIT_REPO=https://github.com/rdkit/rdkit.git
export GIT_BRANCH=master
unset GIT_TAG
export GIT_BRANCH=Release_2018_09_3
export GIT_TAG=$DOCKER_TAG

export BASE=informaticsmatters
echo "DOCKER_TAG=$DOCKER_TAG GIT_BRANCH=$GIT_BRANCH no tag"
echo "DOCKER_TAG=$DOCKER_TAG GIT_BRANCH=$GIT_BRANCH GIT_TAG=$GIT_TAG"