Skip to content

Commit 0dee91b

Browse files
initial push for NIM on OKE
taking the work from the deployment of Triton on OKE as a basis and adapting the walkthrough to NIM following the announces at GTC 2024 and the new release.
1 parent 5058971 commit 0dee91b

File tree

12 files changed

+1243
-0
lines changed

12 files changed

+1243
-0
lines changed

cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/README.md

Lines changed: 308 additions & 0 deletions
Large diffs are not rendered by default.
155 KB
Loading
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/bin/bash
2+
curl --fail -H "Authorization: Bearer Oracle" -L0 http://169.254.169.254/opc/v2/instance/metadata/oke_init_script | base64 --decode >/var/run/oke-init.sh
3+
bash /var/run/oke-init.sh
4+
5+
sudo /usr/libexec/oci-growfs -y
6+
7+
curl https://rclone.org/install.sh | sudo bash
8+
9+
sudo su
10+
11+
mkdir -p /opt/config/rclone
12+
13+
cat << EOF > /opt/config/rclone/rclone.conf
14+
[model_bucket_oci]
15+
type = oracleobjectstorage
16+
provider = instance_principal_auth
17+
namespace = ##NAMESPACE##
18+
compartment = ##COMPARTMENT_OCID##
19+
region = ##REGION##
20+
21+
EOF
22+
23+
24+
#### ******* rclone service:
25+
26+
cat << EOF > /etc/systemd/system/[email protected]
27+
# User service for Rclone mounting
28+
#
29+
# Place in /etc/systemd/system
30+
# File must include the '@' (ex [email protected])
31+
# As your normal user, run
32+
# systemctl daemon-reload
33+
# You can now start/enable each remote by using rclone@<remote>
34+
# systemctl --now rclone@model_bucket_oci
35+
36+
# Make sure to include correct bucket name i.e:
37+
## %i:<bucket-name> /opt/mnt/%i
38+
39+
[Unit]
40+
Description=rclone: Remote FUSE filesystem for cloud storage config %i
41+
Documentation=man:rclone(1)
42+
After=network-online.target
43+
Wants=network-online.target
44+
45+
[Service]
46+
Type=notify
47+
ExecStartPre=-/usr/bin/mkdir -p /opt/mnt/%i
48+
ExecStart= \
49+
/usr/bin/rclone mount \
50+
--config=/opt/config/rclone/rclone.conf \
51+
--tpslimit 50 \
52+
--vfs-cache-mode writes \
53+
--allow-non-empty \
54+
--transfers 10 \
55+
--allow-other \
56+
%i:NIM/llama2-7b-hf/ /opt/mnt/%i
57+
ExecStop=/bin/fusermount -u /opt/mnt/%i
58+
59+
[Install]
60+
WantedBy=default.target
61+
EOF
62+
63+
systemctl daemon-reload
64+
systemctl enable --now rclone@model_bucket_oci
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
model_repo_path: "/model-store/"
2+
use_ensemble: false
3+
model_type: "LLAMA"
4+
backend: "trt_llm"
5+
base_model_id: "ensemble"
6+
prompt_timer: 60
7+
gateway_ip: "gateway-api"
8+
server_port_internal: 9009
9+
customization_cache_capacity: 10000
10+
logging_level: "INFO"
11+
enable_chat: true
12+
pipeline:
13+
model_name: "ensemble"
14+
num_instances: 1
15+
trt_llm:
16+
use: true
17+
ckpt_type: "hf"
18+
model_name: "trt_llm"
19+
backend: "python"
20+
num_gpus: 1
21+
model_path: /engine_dir
22+
max_queue_delay_microseconds: 10000
23+
model_type: "llama"
24+
max_batch_size: 1
25+
max_input_len: 256
26+
max_output_len: 256
27+
max_beam_width: 1
28+
tensor_para_size: 1
29+
pipeline_para_size: 1
30+
data_type: "float16"
31+
int8_mode: 0
32+
enable_custom_all_reduce: 0
33+
per_column_scaling: false
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*~
18+
# Various IDEs
19+
.project
20+
.idea/
21+
*.tmproj
22+
.vscode/
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
apiVersion: v1
28+
appVersion: "1.0"
29+
description: Triton Inference Server
30+
name: triton-inference-server
31+
version: 1.0.0

0 commit comments

Comments
 (0)