diff --git a/charts/azimuth-chat/azimuth-ui.schema.yaml b/charts/azimuth-chat/azimuth-ui.schema.yaml index 74bd573..a1f1488 100644 --- a/charts/azimuth-chat/azimuth-ui.schema.yaml +++ b/charts/azimuth-chat/azimuth-ui.schema.yaml @@ -24,6 +24,7 @@ sortOrder: - /azimuth-llm/ui/appSettings/model_instruction - /azimuth-llm/ui/appSettings/page_title - /azimuth-llm/api/image/version + - /azimuth-llm/api/gpus - /azimuth-llm/ui/appSettings/llm_params/temperature - /azimuth-llm/ui/appSettings/llm_params/max_tokens - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml index 0e6206d..00bc9af 100644 --- a/charts/azimuth-llm/templates/api/deployment.yml +++ b/charts/azimuth-llm/templates/api/deployment.yml @@ -47,6 +47,10 @@ spec: {{- if .Values.api.extraArgs -}} {{- .Values.api.extraArgs | toYaml | nindent 10 }} {{- end -}} + {{- if gt .Values.api.gpus 1 }} + - --tensor-parallel-size + - {{ .Values.api.gpus }} + {{- end }} {{- if .Values.huggingface.secretName -}} envFrom: - secretRef: diff --git a/charts/azimuth-llm/values.schema.json b/charts/azimuth-llm/values.schema.json index 59e0e1b..c13caea 100644 --- a/charts/azimuth-llm/values.schema.json +++ b/charts/azimuth-llm/values.schema.json @@ -40,6 +40,14 @@ "default": "v0.10.2" } } + }, + "gpus": { + "type": "integer", + "title": "GPU Count", + "description": "The number of GPUs to allocate to the model.", + "default": 1, + "minimum": 1, + "maximum": 8 } } },