Skip to content

Commit 8a999db

Browse files
authored
chore: Offline source image builds, use hermeto generic fetcher for arrow deps (feast-dev#5318)
chore: Use hermeto generic fetcher for offline arrow source build Signed-off-by: Tommy Hughes <[email protected]>
1 parent 3d14d61 commit 8a999db

13 files changed

+369
-185
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
arrow-thirdparty-offline-versions

sdk/python/feast/infra/feature_servers/multicloud/offline/Dockerfile.builder.arrow

Lines changed: 0 additions & 114 deletions
This file was deleted.

sdk/python/feast/infra/feature_servers/multicloud/offline/Dockerfile.builder.ibis

Lines changed: 0 additions & 8 deletions
This file was deleted.
Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11
FROM registry.access.redhat.com/ubi9/python-311:1
22
ARG RELEASE
33

4+
ENV IBIS_VERSION="9.5.0"
5+
ENV MILVUS_LITE_VERSION="2.4.12"
6+
47
USER 0
58
RUN yum install -y ninja-build llvm-devel cmake llvm-toolset ncurses-devel rust cargo
69
RUN if [[ -z "$RELEASE" ]] ; then npm install -g yarn yalc && rm -rf .npm ; fi
710
USER 1001
811

912
# milvus dep
10-
RUN pip install milvus-lite==2.4.12
13+
RUN pip install milvus-lite==${MILVUS_LITE_VERSION}
14+
15+
# ibis-framework must build from a clone of its git repo... versioning requirement
16+
## RuntimeError: Unable to detect version control system. Checked: Git. Not installed: Mercurial, Darcs, Subversion, Bazaar, Fossil, Pijul.
17+
RUN git clone -b ${IBIS_VERSION} --single-branch https://github.com/ibis-project/ibis ${APP_ROOT}/src/ibis

sdk/python/feast/infra/feature_servers/multicloud/offline/Dockerfile.sdist

Lines changed: 120 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,123 @@
1-
FROM ibis-builder:latest
1+
FROM yum-builder:dev
2+
3+
ENV APACHE_ARROW_VERSION="17.0.0"
4+
5+
ENV ARROW_HOME=${APP_ROOT}/arrow-dist
6+
ENV LD_LIBRARY_PATH=${ARROW_HOME}/lib:$LD_LIBRARY_PATH
7+
ENV CMAKE_PREFIX_PATH=${ARROW_HOME}:$CMAKE_PREFIX_PATH
8+
ENV THIRD_PARTY_PATH=/tmp/hermeto-generic-output/deps/generic
9+
10+
# configured for Arrow version 17.0.0
11+
RUN mkdir ${APP_ROOT}/src/arrow ${ARROW_HOME} ${APP_ROOT}/src/arrow-build && \
12+
tar xfz ${THIRD_PARTY_PATH}/apache-arrow-${APACHE_ARROW_VERSION}.tar.gz -C ${APP_ROOT}/src/arrow --strip-components=1 && \
13+
export \
14+
# Environment variables for an offline Arrow build
15+
ARROW_ABSL_URL="${THIRD_PARTY_PATH}/absl-20211102.0.tar.gz" \
16+
ARROW_AWS_C_AUTH_URL="${THIRD_PARTY_PATH}/aws-c-auth-v0.6.22.tar.gz" \
17+
ARROW_AWS_C_CAL_URL="${THIRD_PARTY_PATH}/aws-c-cal-v0.5.20.tar.gz" \
18+
ARROW_AWS_C_COMMON_URL="${THIRD_PARTY_PATH}/aws-c-common-v0.8.9.tar.gz" \
19+
ARROW_AWS_C_COMPRESSION_URL="${THIRD_PARTY_PATH}/aws-c-compression-v0.2.16.tar.gz" \
20+
ARROW_AWS_C_EVENT_STREAM_URL="${THIRD_PARTY_PATH}/aws-c-event-stream-v0.2.18.tar.gz" \
21+
ARROW_AWS_C_HTTP_URL="${THIRD_PARTY_PATH}/aws-c-http-v0.7.3.tar.gz" \
22+
ARROW_AWS_C_IO_URL="${THIRD_PARTY_PATH}/aws-c-io-v0.13.14.tar.gz" \
23+
ARROW_AWS_C_MQTT_URL="${THIRD_PARTY_PATH}/aws-c-mqtt-v0.8.4.tar.gz" \
24+
ARROW_AWS_C_S3_URL="${THIRD_PARTY_PATH}/aws-c-s3-v0.2.3.tar.gz" \
25+
ARROW_AWS_C_SDKUTILS_URL="${THIRD_PARTY_PATH}/aws-c-sdkutils-v0.1.6.tar.gz" \
26+
ARROW_AWS_CHECKSUMS_URL="${THIRD_PARTY_PATH}/aws-checksums-v0.1.13.tar.gz" \
27+
ARROW_AWS_CRT_CPP_URL="${THIRD_PARTY_PATH}/aws-crt-cpp-v0.18.16.tar.gz" \
28+
ARROW_AWS_LC_URL="${THIRD_PARTY_PATH}/aws-lc-v1.3.0.tar.gz" \
29+
ARROW_AWSSDK_URL="${THIRD_PARTY_PATH}/aws-sdk-cpp-1.10.55.tar.gz" \
30+
ARROW_BOOST_URL="${THIRD_PARTY_PATH}/boost-1.81.0.tar.gz" \
31+
ARROW_BROTLI_URL="${THIRD_PARTY_PATH}/brotli-v1.0.9.tar.gz" \
32+
ARROW_BZIP2_URL="${THIRD_PARTY_PATH}/bzip2-1.0.8.tar.gz" \
33+
ARROW_CARES_URL="${THIRD_PARTY_PATH}/cares-1.17.2.tar.gz" \
34+
ARROW_CRC32C_URL="${THIRD_PARTY_PATH}/crc32c-1.1.2.tar.gz" \
35+
ARROW_GBENCHMARK_URL="${THIRD_PARTY_PATH}/gbenchmark-v1.8.3.tar.gz" \
36+
ARROW_GFLAGS_URL="${THIRD_PARTY_PATH}/gflags-v2.2.2.tar.gz" \
37+
ARROW_GLOG_URL="${THIRD_PARTY_PATH}/glog-v0.5.0.tar.gz" \
38+
ARROW_GOOGLE_CLOUD_CPP_URL="${THIRD_PARTY_PATH}/google-cloud-cpp-v2.22.0.tar.gz" \
39+
ARROW_GRPC_URL="${THIRD_PARTY_PATH}/grpc-v1.46.3.tar.gz" \
40+
ARROW_GTEST_URL="${THIRD_PARTY_PATH}/gtest-1.11.0.tar.gz" \
41+
ARROW_JEMALLOC_URL="${THIRD_PARTY_PATH}/jemalloc-5.3.0.tar.bz2" \
42+
ARROW_LZ4_URL="${THIRD_PARTY_PATH}/lz4-v1.9.4.tar.gz" \
43+
ARROW_MIMALLOC_URL="${THIRD_PARTY_PATH}/mimalloc-v2.0.6.tar.gz" \
44+
ARROW_NLOHMANN_JSON_URL="${THIRD_PARTY_PATH}/nlohmann-json-v3.10.5.tar.gz" \
45+
ARROW_OPENTELEMETRY_URL="${THIRD_PARTY_PATH}/opentelemetry-cpp-v1.13.0.tar.gz" \
46+
ARROW_OPENTELEMETRY_PROTO_URL="${THIRD_PARTY_PATH}/opentelemetry-proto-v0.17.0.tar.gz" \
47+
ARROW_ORC_URL="${THIRD_PARTY_PATH}/orc-2.0.1.tar.gz" \
48+
ARROW_PROTOBUF_URL="${THIRD_PARTY_PATH}/protobuf-v21.3.tar.gz" \
49+
ARROW_RAPIDJSON_URL="${THIRD_PARTY_PATH}/rapidjson-232389d4f1012dddec4ef84861face2d2ba85709.tar.gz" \
50+
ARROW_RE2_URL="${THIRD_PARTY_PATH}/re2-2022-06-01.tar.gz" \
51+
ARROW_S2N_TLS_URL="${THIRD_PARTY_PATH}/s2n-v1.3.35.tar.gz" \
52+
ARROW_SUBSTRAIT_URL="${THIRD_PARTY_PATH}/substrait-0.44.0.tar.gz" \
53+
ARROW_SNAPPY_URL="${THIRD_PARTY_PATH}/snappy-1.1.10.tar.gz" \
54+
ARROW_THRIFT_URL="${THIRD_PARTY_PATH}/thrift-0.16.0.tar.gz" \
55+
ARROW_UCX_URL="${THIRD_PARTY_PATH}/ucx-1.12.1.tar.gz" \
56+
ARROW_UTF8PROC_URL="${THIRD_PARTY_PATH}/utf8proc-v2.7.0.tar.gz" \
57+
ARROW_XSIMD_URL="${THIRD_PARTY_PATH}/xsimd-13.0.0.tar.gz" \
58+
ARROW_ZLIB_URL="${THIRD_PARTY_PATH}/zlib-1.3.1.tar.gz" \
59+
ARROW_ZSTD_URL="${THIRD_PARTY_PATH}/zstd-1.5.6.tar.gz" \
60+
&& \
61+
cmake \
62+
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
63+
-DARROW_COMPUTE=ON \
64+
-DARROW_ACERO=ON \
65+
-DARROW_WITH_BROTLI=ON \
66+
-DARROW_WITH_BZ2=ON \
67+
-DARROW_CSV=ON \
68+
-DARROW_DATASET=ON \
69+
-DARROW_FILESYSTEM=ON \
70+
-DARROW_FLIGHT=ON \
71+
-DARROW_FLIGHT_SQL=ON \
72+
-DARROW_GANDIVA=ON \
73+
-DARROW_GCS=ON \
74+
-DARROW_HDFS=ON \
75+
-DARROW_JEMALLOC=ON \
76+
-DARROW_JSON=ON \
77+
-DARROW_WITH_LZ4=ON \
78+
-DARROW_MIMALLOC=ON \
79+
-DARROW_PARQUET=ON \
80+
-DARROW_S3=ON \
81+
-DARROW_WITH_SNAPPY=ON \
82+
-DARROW_SUBSTRAIT=ON \
83+
-DARROW_WITH_RE2=ON \
84+
-DARROW_WITH_UTF8PROC=ON \
85+
-DARROW_TENSORFLOW=ON \
86+
-DARROW_WITH_ZLIB=ON \
87+
-DARROW_WITH_ZSTD=ON \
88+
-DARROW_BUILD_SHARED=ON \
89+
-S ${APP_ROOT}/src/arrow/cpp \
90+
-B ${APP_ROOT}/src/arrow-build && \
91+
\
92+
cmake --build ${APP_ROOT}/src/arrow-build --target install && \
93+
\
94+
source /tmp/hermeto.env && \
95+
pip install -r ${APP_ROOT}/src/arrow/python/requirements-wheel-build.txt && \
96+
\
97+
cd ${APP_ROOT}/src/arrow/python && \
98+
PYARROW_PARALLEL=4 python setup.py build_ext --bundle-arrow-cpp bdist_wheel && \
99+
pip install dist/pyarrow-*.whl && \
100+
\
101+
cd ${APP_ROOT}/src && \
102+
rm -rf ${APP_ROOT}/src/arrow-build ${APP_ROOT}/src/arrow ${ARROW_HOME}
103+
104+
RUN python -c "import pyarrow; print(pyarrow.__version__)"
105+
RUN python -c "import pyarrow.lib as _lib; print(_lib.__name__)"
106+
RUN python -c "import pyarrow.parquet as parquet; print(parquet.__name__)"
107+
RUN python -c "import pyarrow.dataset as dataset; print(dataset.__name__)"
108+
RUN python -c "import pyarrow.flight as flight; print(flight.__name__)"
109+
RUN python -c "import pyarrow.substrait as substrait; print(substrait.__name__)"
110+
111+
# a higher numpy was required for the pyarrow wheel build, but the pyarrow module itself can run w/ a lesser version, "numpy>=1.16.6".
112+
# feast requires "numpy<2", so here we install numpy 1.x, as well as some other packages which will be needed for the feast build.
113+
RUN source /tmp/hermeto.env && \
114+
pip install "numpy>=1.16.6,<2"
115+
116+
RUN cd ${APP_ROOT}/src/ibis && \
117+
source /tmp/hermeto.env && \
118+
pip install .[duckdb] && \
119+
cd ${APP_ROOT}/src && \
120+
rm -rf ${APP_ROOT}/src/ibis
2121

3122
# This section only necessary when building from local feast source ... e.g. ".[minimal]"
4123
########################

sdk/python/feast/infra/feature_servers/multicloud/offline/Dockerfile.sdist.release

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
1-
FROM yum-builder:latest
1+
FROM yum-builder:release
2+
3+
ENV APACHE_ARROW_VERSION="17.0.0"
24

3-
ENV OFFLINE_BUILD_DIR=offline_build
45
ENV ARROW_HOME=${APP_ROOT}/arrow-dist
56
ENV LD_LIBRARY_PATH=${ARROW_HOME}/lib:$LD_LIBRARY_PATH
67
ENV CMAKE_PREFIX_PATH=${ARROW_HOME}:$CMAKE_PREFIX_PATH
7-
ENV THIRD_PARTY_PATH=/tmp/arrow/cpp/arrow-thirdparty
8-
9-
COPY --chown=default ${OFFLINE_BUILD_DIR}/arrow/cpp/cmake_modules ${APP_ROOT}/src/arrow/cpp/cmake_modules
10-
COPY --chown=default ${OFFLINE_BUILD_DIR}/arrow/python ${APP_ROOT}/src/arrow/python
8+
ENV THIRD_PARTY_PATH=/tmp/hermeto-generic-output/deps/generic
119

1210
# configured for Arrow version 17.0.0
13-
RUN mkdir ${ARROW_HOME} ${APP_ROOT}/src/arrow-build
14-
RUN export \
11+
RUN mkdir ${APP_ROOT}/src/arrow ${ARROW_HOME} ${APP_ROOT}/src/arrow-build && \
12+
tar xfz ${THIRD_PARTY_PATH}/apache-arrow-${APACHE_ARROW_VERSION}.tar.gz -C ${APP_ROOT}/src/arrow --strip-components=1 && \
13+
export \
1514
# Environment variables for an offline Arrow build
1615
ARROW_ABSL_URL="${THIRD_PARTY_PATH}/absl-20211102.0.tar.gz" \
1716
ARROW_AWS_C_AUTH_URL="${THIRD_PARTY_PATH}/aws-c-auth-v0.6.22.tar.gz" \
@@ -87,7 +86,7 @@ RUN export \
8786
-DARROW_WITH_ZLIB=ON \
8887
-DARROW_WITH_ZSTD=ON \
8988
-DARROW_BUILD_SHARED=ON \
90-
-S /tmp/arrow/cpp \
89+
-S ${APP_ROOT}/src/arrow/cpp \
9190
-B ${APP_ROOT}/src/arrow-build && \
9291
\
9392
cmake --build ${APP_ROOT}/src/arrow-build --target install && \
@@ -100,7 +99,7 @@ RUN export \
10099
pip install dist/pyarrow-*.whl && \
101100
\
102101
cd ${APP_ROOT}/src && \
103-
rm -rf ${APP_ROOT}/src/arrow-build ${APP_ROOT}/src/arrow ${APP_ROOT}/arrow-dist
102+
rm -rf ${APP_ROOT}/src/arrow-build ${APP_ROOT}/src/arrow ${ARROW_HOME}
104103

105104
RUN python -c "import pyarrow; print(pyarrow.__version__)"
106105
RUN python -c "import pyarrow.lib as _lib; print(_lib.__name__)"
@@ -114,14 +113,13 @@ RUN python -c "import pyarrow.substrait as substrait; print(substrait.__name__)"
114113
RUN source /tmp/hermeto.env && \
115114
pip install "numpy>=1.16.6,<2"
116115

117-
COPY --chown=default ${OFFLINE_BUILD_DIR}/ibis ${APP_ROOT}/src/ibis
118116
RUN cd ${APP_ROOT}/src/ibis && \
119117
source /tmp/hermeto.env && \
120118
pip install .[duckdb] && \
121119
cd ${APP_ROOT}/src && \
122120
rm -rf ${APP_ROOT}/src/ibis
123121

124-
COPY sdk/python/feast/infra/feature_servers/multicloud/requirements.txt requirements.txt
122+
COPY requirements.txt requirements.txt
125123
RUN source /tmp/hermeto.env && \
126124
pip install -r requirements.txt
127125

0 commit comments

Comments
 (0)