File tree Expand file tree Collapse file tree 11 files changed +1075
-11
lines changed
inference/a4x/single-host-serving/tensorrt-llm-gcs
a4x/inference-templates-gcs/deployment Expand file tree Collapse file tree 11 files changed +1075
-11
lines changed Load Diff Large diffs are not rendered by default.
Original file line number Diff line number Diff line change 1+ # Copyright 2025 Google LLC
2+ #
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+
15+ queue :
16+
17+ dwsSettings :
18+ maxRunDurationSeconds :
19+
20+ volumes :
21+ gcsVolumes : true
22+ ssdMountPath : " /ssd"
23+ gcsMounts :
24+ - bucketName :
25+ mountPath : " /gcs-logs"
26+ pvcMounts :
27+ - claimName : " gcs-serving-model-pvc"
28+ mountPath : " /serving-model"
29+
30+ service :
31+ type : ClusterIP
32+ ports :
33+ http : 8000
34+
35+ workload :
36+ model :
37+ name :
38+ gpus : 4
39+ image :
40+ framework : trtllm
41+ configFile : serving-args.yaml
42+ configPath : /workload/configs
43+ envs :
44+ - name : LAUNCHER_SCRIPT
45+ value : " /workload/launcher/launch-workload.sh"
46+ - name : SERVER_ARGS_FILE
47+ value : " /workload/configs/serving-args.yaml"
48+ benchmarks :
49+ experiments :
50+ - isl : 128
51+ osl : 128
52+ num_requests : 1000
53+
54+ network :
55+ gibVersion : us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib-arm64:v1.0.7
56+ ncclSettings :
57+ - name : NCCL_DEBUG
58+ value : " VERSION"
59+
Original file line number Diff line number Diff line change 1+ # Copyright 2025 Google LLC
2+ #
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+
15+ apiVersion : v2
16+ name : single-host-serving-deployment-template
17+ description : single-host-serving-deployment-template
18+ type : application
19+ version : 0.1.0
20+ appVersion : " 1.16.0"
Original file line number Diff line number Diff line change 1+ # Copyright 2025 Google LLC
2+ #
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+
15+ apiVersion : v1
16+ kind : ConfigMap
17+ metadata :
18+ name : " {{ .Release.Name }}-config"
19+ data :
20+ serving-configuration : |-
21+ {{- if .Values.serving_config }}
22+ {{ .Values.serving_config | nindent 4 }}
23+ {{- else }}
24+ {{ "config: null" | nindent 4 }}
25+ {{- end }}
Original file line number Diff line number Diff line change 1+ # Copyright 2025 Google LLC
2+ #
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+
15+ apiVersion : v1
16+ kind : ConfigMap
17+ metadata :
18+ name : " {{ .Release.Name }}-launcher"
19+ data :
20+ launch-workload.sh : |-
21+ {{- if .Values.workload_launcher }}
22+ {{ .Values.workload_launcher | nindent 4 }}
23+ {{- else }}
24+ # !/bin/bash
25+ echo "No workload launcher specified"
26+ exit 1
27+ {{- end }}
You can’t perform that action at this time.
0 commit comments