1
1
FROM lablup/common-base:20.07-py36-cuda10.1
2
2
3
- # Install TensorFlow, Keras, PyTorch and MXNet
4
- ENV TENSORFLOW_VERSION=2.3.0
3
+ ARG TF_BUILD_VERSION=r2.3
4
+ # Install the most recent bazel release.
5
+ ENV BAZEL_VERSION 3.1.0
5
6
6
- ENV PYTHONUNBUFFERED=1 \
7
- LD_LIBRARY_PATH="/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/nvidia/lib64:/usr/include/x86_64-linux-gnu" \
8
- PATH="/usr/local/mpi/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/tensorrt/bin:/usr/local/bin/mecab" \
9
- LANG=C.UTF-8
7
+ # Set up Bazel.
10
8
11
- # Set default shell to /bin/bash
12
- SHELL ["/bin/bash" , "-cu" ]
9
+ # Running bazel inside a `docker build` command causes trouble, cf:
10
+ # https://github.com/bazelbuild/bazel/issues/134
11
+ # The easiest solution is to set up a bazelrc file forcing --batch.
12
+ RUN echo "startup --batch" >>/etc/bazel.bazelrc
13
+ # Similarly, we need to workaround sandboxing issues:
14
+ # https://github.com/bazelbuild/bazel/issues/418
15
+ RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
16
+ >>/etc/bazel.bazelrc
13
17
14
- RUN python3 -m pip install --no-cache-dir --upgrade \
15
- gast==0.3.3 \
16
- 'httplib2<0.18.0,>=0.8' \
17
- 'coverage<5.0,>=4.5' \
18
- mock==3.0.5 \
19
- tensorflow==${TENSORFLOW_VERSION} \
20
- mesh-tensorflow==0.1.16 \
21
- tensorboard
18
+ WORKDIR /
19
+ RUN mkdir /bazel && \
20
+ cd /bazel && \
21
+ curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
22
+ curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" -fSsL -o /bazel/LICENSE.txt https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE && \
23
+ chmod +x bazel-*.sh && \
24
+ ./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
25
+ cd / && \
26
+ rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
22
27
23
- RUN python3 -m pip install --no-cache-dir --upgrade \
24
- -q git+https://github.com/google-research/tensorflow_constrained_optimization
25
- RUN python3 -m pip install --no-cache-dir --upgrade \
28
+
29
+ RUN python3 -m pip install --no-cache-dir \
30
+ mesh-tensorflow==0.1.16 \
26
31
cachetools==3.1.0 \
27
- tensorflow-transform==0.22 .0 \
32
+ tensorflow-transform==0.23 .0 \
28
33
tensorflow_model_analysis==0.22.2 \
29
34
ngraph-tensorflow-bridge==0.18.0 \
30
35
tensorflow-metadata==0.22.2 \
@@ -33,7 +38,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade \
33
38
tensorwatch==0.9.1 \
34
39
tensorflow-hub==0.8.0 \
35
40
tensorflow-addons==0.10.0 \
36
- tensorflow_text==2.2.1 \
41
+ tensorflow_text==2.3.0 \
37
42
neural-structured-learning==1.1.0 \
38
43
tensorflow_constrained_optimization \
39
44
tensorflow-graphics-gpu==1.0.0 \
@@ -46,28 +51,89 @@ RUN python3 -m pip install --no-cache-dir --upgrade \
46
51
tf-slim==1.1.0 \
47
52
tensorflow-plot==0.3.2 \
48
53
tensorflow-lattice==2.0.5 \
49
- tensorflow-io==0.13 .0 \
50
- tfx==0.22.0 \
54
+ tensorflow-io==0.15 .0 \
55
+ tfx==0.22.1 \
51
56
tfx-bsl==0.22.1
52
57
RUN python3 -m pip install --no-cache-dir \
53
58
tensorflow_ranking==0.3.1 \
54
59
tensorflow-compression==1.3 \
55
60
tensor2tensor==1.15.7 \
56
61
jupyterlab-nvdashboard==0.3.1
57
62
58
- RUN python3 -m pip install --no-cache-dir \
59
- tensorflow==${TENSORFLOW_VERSION}
63
+ # Download and build TensorFlow.
64
+ WORKDIR / tensorflow
60
65
61
- RUN python3 -m pip install --no-cache-dir --extra-index-url \
62
- https://developer.download.nvidia.com/compute/redist \
63
- nvidia-dali-cuda100 && \
64
- python3 -m pip install --no-cache-dir --extra-index-url \
65
- https://developer.download.nvidia.com/compute/redist nvidia-dali-tf-plugin-cuda100
66
+ # Download and build TensorFlow.
67
+ # Enable checking out both tags and branches
68
+ RUN export TAG_PREFIX="v" && \
69
+ echo ${TF_BUILD_VERSION} | grep -q ^${TAG_PREFIX}; \
70
+ if [ $? -eq 0 ]; then \
71
+ git clone --depth=1 https://github.com/tensorflow/tensorflow.git . && \
72
+ git fetch --tags && \
73
+ git checkout ${TF_BUILD_VERSION}; \
74
+ else \
75
+ git clone --depth=1 --branch=${TF_BUILD_VERSION} https://github.com/tensorflow/tensorflow.git . ; \
76
+ fi
77
+
78
+ RUN yes "" | python3 configure.py
79
+ RUN cp .bazelrc /root/.bazelrc
80
+
81
+ ENV CI_BUILD_PYTHON ${PYTHON}
82
+ ENV WHL_DIR=/tmp/pip3
83
+ # Set bazel build parameters in .bazelrc in parameterized_docker_build.sh
84
+ # Use --copt=-march values to get optimized builds appropriate for the hardware
85
+ # platform of your choice.
86
+ # For ivy-bridge or sandy-bridge
87
+ # --copt=-march="avx" \
88
+ # For haswell, broadwell, or skylake
89
+ # --copt=-march="avx2" \
90
+ COPY .bazelrc /root/.mkl.bazelrc
91
+ RUN echo "import /root/.mkl.bazelrc" >>/root/.bazelrc
92
+
93
+ # ENV TF_NEED_TENSORRT=1
94
+ ENV TF_CUDA_COMPUTE_CAPABILITIES sm_35,sm_37,sm_52,sm_60,sm_61,sm_70,sm_75,compute_70,compute_75,compute_80
95
+
96
+ RUN tensorflow/tools/ci_build/builds/configured GPU \
97
+ bazel --bazelrc=/root/.bazelrc build \
98
+ -c opt \
99
+ --copt=-msse4.1 \
100
+ --copt=-msse4.2 \
101
+ --copt=-mavx \
102
+ --copt=-mavx2 \
103
+ --copt=-mfma \
104
+ --copt=-mfpmath=both \
105
+ --copt=-O3 \
106
+ --copt=-Wformat \
107
+ --copt=-Wformat-security \
108
+ --copt=-fstack-protector \
109
+ --copt=-fPIC \
110
+ --copt=-fpic \
111
+ --config=opt \
112
+ --config=cuda \
113
+ --config=mkl \
114
+ --config=monolithic \
115
+ --config=gdr \
116
+ --config=verbs \
117
+ # --config=ngraph \
118
+ --config=numa \
119
+ --config=v2 \
120
+ --linkopt=-znoexecstack \
121
+ --linkopt=-zrelro \
122
+ --linkopt=-znow \
123
+ --linkopt=-fstack-protector \
124
+ --linkopt -ldl \
125
+ --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
126
+ -k //tensorflow/tools/pip_package:build_pip_package && \
127
+ bazel-bin/tensorflow/tools/pip_package/build_pip_package "${WHL_DIR}" && \
128
+ python3 -m pip --no-cache-dir install --upgrade "${WHL_DIR}" /tensorflow-*.whl
129
+ RUN python3 -m pip --no-cache-dir install \
130
+ tensorboard==2.3 && \
131
+ rm -rf /root/.cache
132
+
133
+ # Clean up Bazel cache when done.
66
134
67
135
# Install Horovod, temporarily using CUDA stubs
68
- RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \
69
- HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL \
70
- HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITHOUT_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 \
136
+ RUN HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITHOUT_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 \
71
137
pip install --no-cache-dir horovod==0.19.5 && \
72
138
ldconfig
73
139
@@ -76,8 +142,25 @@ RUN python3 -m pip install --no-cache-dir \
76
142
nni==1.6 \
77
143
scikit-nni==0.2.1
78
144
145
+ RUN python3 -m pip install --no-cache-dir --extra-index-url \
146
+ https://developer.download.nvidia.com/compute/redist \
147
+ nvidia-dali-cuda110
148
+ # RUN python3 -m pip install --no-cache-dir \
149
+ # --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-tf-plugin-cuda110
150
+ # WORKDIR /tmp
151
+ # RUN git clone --recursive https://github.com/NVIDIA/DALI && \
152
+ # cd DALI && \
153
+ # mkdir build && \
154
+ # cd build && \
155
+ # cmake -D CMAKE_BUILD_TYPE=Release .. && \
156
+ # make -j"$(nproc)" && \
157
+ # cd .. && \
158
+ # python3 -m pip install --no-cache-dir dali/python
159
+
160
+
161
+
79
162
# Install ipython kernelspec
80
- Run python3 -m ipykernel install --display-name "TensorFlow 2.3 on Python 3.6 ( CUDA 10.1) " && \
163
+ Run python3 -m ipykernel install --display-name "TensorFlow 2.3 on Python 3.6 & CUDA 10.1" && \
81
164
cat /usr/local/share/jupyter/kernels/python3/kernel.json
82
165
83
166
# Backend.AI specifics
0 commit comments