Skip to content

Commit 72c408d

Browse files
mosoutcrazy-JiangDongHuaJiangDongHua
authored
Multi backends (#33)
* triton version * add dockerfiles * refine * refine * refine * Add python gennel backend (#34) * add python gennel backend * fix * fix --------- Co-authored-by: JiangDongHua <jiangdonghua@oneflow-28> Co-authored-by: Mosout <[email protected]> * fix * [bugfix] fix entrypoint python path bug * SD backend (#35) * init * [bugfix] fix entrypoint python path bug * [feature] add sd backend, example and dockerfile --------- Co-authored-by: JiangDongHua <jiangdonghua@oneflow-28> * unifiy dockerfile * refine cmake --------- Co-authored-by: Frank J <[email protected]> Co-authored-by: JiangDongHua <jiangdonghua@oneflow-28>
1 parent 29881f6 commit 72c408d

24 files changed

+1612
-13
lines changed

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
2020
option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
2121

2222
set(THIRD_PARTY_MIRROR "" CACHE STRING "")
23-
set(TRITON_RELATED_REPO_TAG r21.10 CACHE STRING "Tag for triton-inference-server/(common|core|backend) repo")
23+
set(TRITON_RELATED_REPO_TAG r23.10 CACHE STRING "Tag for triton-inference-server/(common|core|backend) repo")
2424
message(STATUS ${TRITON_RELATED_REPO_TAG})
25-
if(NOT ${TRITON_RELATED_REPO_TAG} STREQUAL "r21.10")
26-
message(FATAL_ERROR "Only support triton with tag r21.10.")
25+
if(NOT ${TRITON_RELATED_REPO_TAG} MATCHES "^(r21.10|r23.10)$")
26+
message(FATAL_ERROR "Only support triton with tag r21.10 or r23.10.")
2727
endif()
2828

2929
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

ci/build/oneflow-serving.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def __init__(self) -> None:
102102
def prepare(self):
103103
self._parse()
104104

105-
self._unknown.extend(['--strict-model-config', 'false'])
105+
self._unknown.extend(['--disable-auto-complete-config'])
106106
self._unknown_split = []
107107
for argument in self._unknown:
108108
self._unknown_split.extend(argument.split('='))
@@ -116,7 +116,6 @@ def prepare(self):
116116
self._model_repos.append('/models')
117117
self._unknown.append('--model-store')
118118
self._unknown.append('/models')
119-
120119
self._launch_command = 'tritonserver ' + ' '.join(self._unknown)
121120
self._collect_models()
122121
self._prepare_processor()

ci/build/oneflow_serving_triton_entrypoint.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ echo
4343

4444
# This script can either be a wrapper around arbitrary command lines,
4545
# or it will simply exec bash if no arguments were given
46+
export PYTHONPATH=/opt/oneflow/python:$PYTHONPATH
4647
if [[ $# -eq 0 ]]; then
47-
exec oneflow-serving --model-store /models --strict-model-config false
48+
exec oneflow-serving --model-store /models
4849
else
4950
exec "$@"
5051
fi

cmake/triton.cmake

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,15 @@ if(DEFINED THIRD_PARTY_MIRROR)
1111
endif()
1212

1313
if(${TRITON_RELATED_REPO_TAG} STREQUAL "r21.10")
14-
set(REPO_COMMON_MD5 72bf32b638fe6a9e9877630cb099fc1a)
15-
set(REPO_CORE_MD5 59d97b3e5d40ea58c9f685b6ecb0771a)
16-
set(REPO_BACKEND_MD5 2ae374cf913fc5b348b6552858fb7e7b)
14+
set(REPO_COMMON_MD5 72bf32b638fe6a9e9877630cb099fc1a)
15+
set(REPO_CORE_MD5 59d97b3e5d40ea58c9f685b6ecb0771a)
16+
set(REPO_BACKEND_MD5 2ae374cf913fc5b348b6552858fb7e7b)
17+
elseif(${TRITON_RELATED_REPO_TAG} STREQUAL "r23.10")
18+
set(REPO_COMMON_MD5 8183efa82f41c4964c26e9b839ef2760)
19+
set(REPO_CORE_MD5 ba92d1b9aa5154edb26fc9664224f9ae)
20+
set(REPO_BACKEND_MD5 c7a6a21353e8f00e61bd97afd8708c0a)
1721
else()
18-
message(FATAL_ERROR "Only support triton with tag r21.10.")
22+
message(FATAL_ERROR "Only support triton with tag r21.10 or r23.10.")
1923
endif()
2024

2125

docker/Dockerfile.base

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
FROM nvcr.io/nvidia/tritonserver:23.10-py3-min
2+
3+
ENV DEBIAN_FRONTEND=noninteractive
4+
5+
RUN APT_INSTALL="apt-get install -y --no-install-recommends" && \
6+
sed -i 's/archive.ubuntu.com/mirrors.ustc.edu.cn/g' /etc/apt/sources.list && \
7+
apt-get update && \
8+
$APT_INSTALL \
9+
build-essential \
10+
apt-utils \
11+
ca-certificates \
12+
wget \
13+
git \
14+
vim \
15+
libssl-dev \
16+
curl \
17+
unzip \
18+
unrar \
19+
libgl1-mesa-glx \
20+
libglib2.0-0 \
21+
libmkl-full-dev \
22+
nasm \
23+
ccache \
24+
lld \
25+
binutils-dev \
26+
software-properties-common \
27+
libb64-0d \
28+
libb64-dev \
29+
libre2-9 \
30+
libre2-dev \
31+
rapidjson-dev
32+
33+
RUN wget https://mirrors.bfsu.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
34+
bash ./Miniconda3-latest-Linux-x86_64.sh -b -p /miniconda3 && \
35+
/miniconda3/bin/python -m pip config set global.index-url https://mirrors.bfsu.edu.cn/pypi/web/simple && \
36+
/miniconda3/bin/conda create -n py310 python=3.10.12 -y && \
37+
PIP_INSTALL="/miniconda3/envs/py310/bin/python -m pip --no-cache-dir install --upgrade" && \
38+
$PIP_INSTALL cmake \
39+
ninja && \
40+
ln -s /miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
41+
echo ". /miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \
42+
echo "conda activate py310" >> ~/.bashrc && \
43+
find /miniconda3/ -follow -type f -name '*.a' -delete && \
44+
find /miniconda3/ -follow -type f -name '*.js.map' -delete && \
45+
/miniconda3/bin/conda clean -afy && \
46+
rm -f ./Miniconda3-latest-Linux-x86_64.sh

docker/Dockerfile.build_of

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
FROM serving:base
2+
3+
ENV PATH /miniconda3/envs/py310/bin:$PATH
4+
5+
RUN cd /opt && \
6+
git clone https://github.com/Oneflow-Inc/oneflow.git && \
7+
cd /opt/oneflow && \
8+
mkdir build && \
9+
cd /opt/oneflow/build && \
10+
cmake -C /opt/oneflow/cmake/caches/ci/serving/cuda-75.cmake \
11+
-DCMAKE_EXE_LINKER_FLAGS_INIT="-fuse-ld=lld" \
12+
-DCMAKE_MODULE_LINKER_FLAGS_INIT="-fuse-ld=lld" \
13+
-DCMAKE_SHARED_LINKER_FLAGS_INIT="-fuse-ld=lld" \
14+
-DBUILD_HWLOC=OFF \
15+
-DWITH_MLIR_CUDA_CODEGEN=ON \
16+
-DCMAKE_CUDA_ARCHITECTURES="75;86" /opt/oneflow && \
17+
ninja -j32

docker/Dockerfile.serving

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
FROM serving:build_of as build_of
2+
FROM nvcr.io/nvidia/tritonserver:23.10-py3 as full
3+
FROM serving:base
4+
5+
ENV PATH /miniconda3/envs/py310/bin:$PATH
6+
ENV PATH /opt/tritonserver/bin:${PATH}
7+
8+
# Create a user that can be used to run triton as
9+
# non-root. Make sure that this user to given ID 1000. All server
10+
# artifacts copied below are assign to this user.
11+
ENV TRITON_SERVER_USER=triton-server
12+
RUN userdel tensorrt-server > /dev/null 2>&1 || true && if ! id -u $TRITON_SERVER_USER > /dev/null 2>&1 ; then useradd $TRITON_SERVER_USER; fi && [ `id -u $TRITON_SERVER_USER` -eq 1000 ] && [ `id -g $TRITON_SERVER_USER` -eq 1000 ]
13+
14+
# Ensure apt-get won't prompt for selecting options
15+
ENV DEBIAN_FRONTEND=noninteractive
16+
17+
RUN distribution=$(. /etc/os-release;echo $ID$VERSION_ID | sed -e 's/\.//g') && \
18+
wget https://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64/cuda-keyring_1.0-1_all.deb && \
19+
dpkg -i cuda-keyring_1.0-1_all.deb && \
20+
apt-get update && \
21+
apt-get install -y datacenter-gpu-manager=1:2.4.7 && \
22+
rm cuda-keyring_1.0-1_all.deb
23+
24+
WORKDIR /opt/tritonserver
25+
RUN rm -rf /opt/tritonserver/*
26+
COPY --chown=1000:1000 --from=full /opt/tritonserver/LICENSE .
27+
COPY --chown=1000:1000 --from=full /opt/tritonserver/TRITON_VERSION .
28+
COPY --chown=1000:1000 --from=full /opt/tritonserver/NVIDIA_Deep_Learning_Container_License.pdf .
29+
COPY --chown=1000:1000 --from=full /opt/tritonserver/bin bin/
30+
COPY --chown=1000:1000 --from=full /opt/tritonserver/lib lib/
31+
COPY --chown=1000:1000 --from=full /opt/tritonserver/include include/
32+
COPY --chown=1000:1000 --from=full /opt/tritonserver/backends/python backends/python/
33+
34+
RUN cd /opt && \
35+
git clone https://github.com/Oneflow-Inc/serving.git && \
36+
cd /opt/serving && \
37+
git checkout multi_backends && \
38+
cp /opt/serving/ci/build/oneflow_serving_triton_entrypoint.sh /opt/nvidia/ && \
39+
cp /opt/serving/ci/build/oneflow-serving.py /opt/tritonserver/bin/oneflow-serving && \
40+
mkdir -p /opt/tritonserver/backends/oneflow_python && \
41+
cp /opt/serving/src/triton_python/model.py /opt/tritonserver/backends/oneflow_python/
42+
43+
44+
COPY --chown=1000:1000 --from=build_of /opt/oneflow /opt/oneflow
45+
COPY --chown=1000:1000 --from=build_of /opt/oneflow/build/liboneflow_cpp /opt/liboneflow_cpp
46+
47+
RUN apt-get install -y --no-install-recommends libunwind-dev libarchive-dev && \
48+
rm /opt/oneflow/python/oneflow/core && \
49+
cp -r /opt/oneflow/build/of_proto_python/oneflow/core /opt/oneflow/python/oneflow/ && \
50+
rm -rf /opt/oneflow/build && \
51+
/miniconda3/envs/py310/bin/python -m pip --no-cache-dir install -r /opt/oneflow/dev-requirements.txt && \
52+
/miniconda3/envs/py310/bin/python -m pip --no-cache-dir install typing_extensions && \
53+
echo "export PYTHONPATH=/opt/oneflow/python:$PYTHONPATH" >> ~/.bashrc
54+
55+
RUN cd /opt && \
56+
git clone https://github.com/Oneflow-Inc/oneflow-lite.git && \
57+
cd /opt/oneflow-lite/runtime && \
58+
mkdir build && \
59+
cd /opt/oneflow-lite/runtime/build && \
60+
cmake -DBUILD_X86=ON \
61+
-DBUILD_CUDA=ON .. && \
62+
make -j32 && \
63+
make install
64+
65+
RUN mkdir -p /opt/serving/build && \
66+
cd /opt/serving/build && \
67+
cmake -DCMAKE_PREFIX_PATH=/opt/liboneflow_cpp/share \
68+
-DTRITON_RELATED_REPO_TAG="r23.10" \
69+
-DTRITON_ENABLE_GPU=ON \
70+
-DTHIRD_PARTY_MIRROR=aliyun \
71+
-DBUILD_ONEFLOW_LITE_BACKEND=ON \
72+
-DBUILD_ONEFLOW_BACKEND=ON \
73+
-G Ninja .. && \
74+
ninja -j32 && \
75+
mkdir -p /opt/tritonserver/backends/oneflow && \
76+
mkdir -p /opt/tritonserver/backends/oneflow_lite && \
77+
mv /opt/serving/build/libtriton_oneflow.so /opt/tritonserver/backends/oneflow/ && \
78+
mv /opt/serving/build/libtriton_oneflow_lite.so /opt/tritonserver/backends/oneflow_lite/ && \
79+
mkdir -p /usr/local/nvidia/lib && \
80+
mv /opt/liboneflow_cpp/lib/* /usr/local/nvidia/lib/ && \
81+
cd /opt && \
82+
rm -rf /opt/liboneflow_cpp && \
83+
rm -rf /opt/oneflow_lite && \
84+
rm -rf /opt/serving
85+
86+
ENTRYPOINT ["/opt/nvidia/oneflow_serving_triton_entrypoint.sh"]

examples/cat.jpg

523 KB
Loading
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
triton_client = httpclient.InferenceServerClient(url='127.0.0.1:8000')
3232

3333
image = Image.open(FLAGS.image)
34-
image = image.resize((224, 224), Image.ANTIALIAS)
34+
image = image.resize((224, 224), Image.LANCZOS)
3535
image = np.asarray(image)
3636
image = image / 255
3737
image = np.expand_dims(image, axis=0)
@@ -44,7 +44,7 @@
4444
outputs = []
4545
outputs.append(httpclient.InferRequestedOutput('OUTPUT_0', binary_data=True, class_count=3))
4646
now = time.time()
47-
results = triton_client.infer("resnet50_oneflow", inputs=inputs, outputs=outputs)
48-
print(time.time() - now)
47+
results = triton_client.infer("resnet50", inputs=inputs, outputs=outputs)
48+
print(f"time cost: {time.time() - now}s")
4949
output_data0 = results.as_numpy('OUTPUT_0')
5050
print(output_data0)

0 commit comments

Comments
 (0)