oracle-devrel
diff --git a/‎cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/README.md‎
Lines changed: 308 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/README.md‎
Lines changed: 308 additions & 0 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/architecture-diagram.png‎
155 KB b/‎cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/architecture-diagram.png‎
155 KB
diff --git a/‎cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/cloud-init‎
Lines changed: 64 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/cloud-init‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/model_config.yaml‎
Lines changed: 33 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/model_config.yaml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/oke/.helmignore‎
Lines changed: 22 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/oke/.helmignore‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/oke/Chart.yaml‎
Lines changed: 31 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/GPU/nim-gpu-oke/oke/Chart.yaml‎
Lines changed: 31 additions & 0 deletions
@@ -0,0 +1,64 @@
+#!/bin/bash
+curl --fail -H "Authorization: Bearer Oracle" -L0 http://169.254.169.254/opc/v2/instance/metadata/oke_init_script | base64 --decode >/var/run/oke-init.sh
+bash /var/run/oke-init.sh
+
+sudo /usr/libexec/oci-growfs -y
+
+curl https://rclone.org/install.sh | sudo bash
+
+sudo su 
+
+mkdir -p /opt/config/rclone
+
+cat << EOF > /opt/config/rclone/rclone.conf
+[model_bucket_oci]
+type = oracleobjectstorage
+provider = instance_principal_auth
+namespace = ##NAMESPACE##
+compartment = ##COMPARTMENT_OCID##
+region = ##REGION##
+
+EOF
+
+
+#### ******* rclone service:
+
+cat << EOF > /etc/systemd/system/[email protected]
+# User service for Rclone mounting
+#
+# Place in /etc/systemd/system
+# File must include the '@' (ex [email protected])
+# As your normal user, run 
+#   systemctl daemon-reload
+# You can now start/enable each remote by using rclone@<remote>
+#   systemctl --now rclone@model_bucket_oci
+
+# Make sure to include correct bucket name i.e:
+##  %i:<bucket-name> /opt/mnt/%i
+
+[Unit]
+Description=rclone: Remote FUSE filesystem for cloud storage config %i
+Documentation=man:rclone(1)
+After=network-online.target
+Wants=network-online.target
+
+[Service]
+Type=notify
+ExecStartPre=-/usr/bin/mkdir -p /opt/mnt/%i
+ExecStart= \
+  /usr/bin/rclone mount \
+    --config=/opt/config/rclone/rclone.conf \
+    --tpslimit 50 \
+    --vfs-cache-mode writes \
+    --allow-non-empty \
+    --transfers 10 \
+    --allow-other \
+    %i:NIM/llama2-7b-hf/ /opt/mnt/%i
+ExecStop=/bin/fusermount -u /opt/mnt/%i
+
+[Install]
+WantedBy=default.target
+EOF
+
+systemctl daemon-reload
+systemctl enable --now rclone@model_bucket_oci
@@ -0,0 +1,33 @@
+model_repo_path: "/model-store/"
+use_ensemble: false
+model_type: "LLAMA"
+backend: "trt_llm"
+base_model_id: "ensemble"
+prompt_timer: 60
+gateway_ip: "gateway-api"
+server_port_internal: 9009
+customization_cache_capacity: 10000
+logging_level: "INFO"
+enable_chat: true
+pipeline:
+  model_name: "ensemble"
+  num_instances: 1
+trt_llm:
+  use: true
+  ckpt_type: "hf"
+  model_name: "trt_llm"
+  backend: "python"
+  num_gpus: 1
+  model_path: /engine_dir
+  max_queue_delay_microseconds: 10000
+  model_type: "llama"
+  max_batch_size: 1
+  max_input_len: 256
+  max_output_len: 256
+  max_beam_width: 1
+  tensor_para_size: 1
+  pipeline_para_size: 1
+  data_type: "float16"
+  int8_mode: 0
+  enable_custom_all_reduce: 0
+  per_column_scaling: false
@@ -0,0 +1,22 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
@@ -0,0 +1,31 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+appVersion: "1.0"
+description: Triton Inference Server
+name: triton-inference-server
+version: 1.0.0