ollama-helm/values.yaml at main · otwld/ollama-helm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
# Default values for ollama-helm.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

# -- Number of replicas
replicaCount: 1

# Knative configuration
knative:
  # -- Enable Knative integration
  enabled: false
  # -- Knative service container concurrency
  containerConcurrency: 0
  # -- Knative service timeout seconds
  timeoutSeconds: 300
  # -- Knative service response start timeout seconds
  responseStartTimeoutSeconds: 300
  # -- Knative service idle timeout seconds
  idleTimeoutSeconds: 300
  # -- Knative service annotations
  annotations: {}

# Docker image
image:
  # -- Docker image registry
  repository: ollama/ollama

  # -- Docker pull policy
  pullPolicy: IfNotPresent

  # -- Docker image tag, overrides the image tag whose default is the chart appVersion.
  tag: ""

# -- Docker registry secret names as an array
imagePullSecrets: []

# -- String to partially override template  (will maintain the release name)
nameOverride: ""

# -- String to fully override template
fullnameOverride: ""

# -- String to fully override namespace
namespaceOverride: ""

# Ollama parameters
ollama:
  # Port Ollama is listening on
  port: 11434

  gpu:
    # -- Enable GPU integration
    enabled: false

    # -- Enable DRA GPU integration
    # If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters
    draEnabled: false

    # -- DRA GPU DriverClass
    draDriverClass: "gpu.nvidia.com"

    # -- Existing DRA GPU ResourceClaim Template
    draExistingClaimTemplate: ""

    # -- GPU type: 'nvidia' or 'amd'
    # If 'ollama.gpu.enabled', default value is nvidia
    # If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override
    # This is due cause AMD and CPU/CUDA are different images
    type: 'nvidia'

    # -- Specify the number of GPU
    # If you use MIG section below then this parameter is ignored
    number: 1

    # -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice
    nvidiaResource: "nvidia.com/gpu"
    # nvidiaResource: "nvidia.com/mig-1g.10gb" # example
    # If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used)

    mig:
      # -- Enable multiple mig devices
      # If enabled you will have to specify the mig devices
      # If enabled is set to false this section is ignored
      enabled: false

      # -- Specify the mig devices and the corresponding number
      devices: {}
          #        1g.10gb: 1
          #        3g.40gb: 1

  models:
    # -- List of models to pull at container startup
    # The more you add, the longer the container will take to start if models are not present
    # pull:
    #  - llama2
    #  - mistral
    pull: []

    # -- List of models to load in memory at container startup
    # run:
    #  - llama2
    #  - mistral
    run: []

    # -- List of models to create at container startup, there are two options
    # 1. Create a raw model
    # 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory.
    # create:
    #  - name: llama3.1-ctx32768
    #    configMapRef: my-configmap
    #    configMapKeyRef: configmap-key
    #  - name: llama3.1-ctx32768
    #    template: |
    #      FROM llama3.1
    #      PARAMETER num_ctx 32768
    create: []

    # -- Automatically remove models present on the disk but not specified in the values file
    clean: false

  # -- Add insecure flag for pulling at container startup
  insecure: false

  # -- Override ollama-data volume mount path, default: "/root/.ollama"
  mountPath: ""

# Service account
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
serviceAccount:
  # -- Specifies whether a service account should be created
  create: true

  # -- Automatically mount a ServiceAccount's API credentials?
  automount: true

  # -- Annotations to add to the service account
  annotations: {}

  # -- The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""

# -- Map of annotations to add to the pods
podAnnotations: {}

# -- Map of labels to add to the pods
podLabels: {}

# -- Pod Security Context
podSecurityContext: {}
  # fsGroup: 2000

# -- Priority Class Name
priorityClassName: ""

# -- Container Security Context
securityContext: {}
  # capabilities:
  #  drop:
  #   - ALL
  # readOnlyRootFilesystem: true
  # runAsNonRoot: true
  # runAsUser: 1000

# -- Specify runtime class
runtimeClassName: ""

# Configure Service
service:

  # -- Service type
  type: ClusterIP

  # -- Service port
  port: 11434

  # -- Service node port when service type is 'NodePort'
  nodePort: 31434

  # -- Load Balancer IP address
  loadBalancerIP:

  # -- Annotations to add to the service
  annotations: {}

  # -- Labels to add to the service
  labels: {}

  # -- IP Families for the service
  ipFamilies: []
  # - IPv4
  # - IPv6

  # -- IP Family Policy for the service
  ipFamilyPolicy: ""
  # SingleStack
  # PreferDualStack
  # RequireDualStack

# Configure Deployment
deployment:

  # -- Labels to add to the deployment
  labels: {}

# Configure the ingress resource that allows you to access the
ingress:
  # -- Enable ingress controller resource
  enabled: false

  # -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+)
  className: ""

  # -- Additional annotations for the Ingress resource.
  annotations: {}
    # kubernetes.io/ingress.class: traefik
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"

  # The list of hostnames to be covered with this ingress record.
  hosts:
    - host: ollama.local
      paths:
        - path: /
          pathType: Prefix

  # --  The tls configuration for hostnames to be covered with this ingress record.
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local

# Configure resource requests and limits
# ref: http://kubernetes.io/docs/user-guide/compute-resources/
resources:
  # -- Pod requests
  requests: {}
    # Memory request
    # memory: 4096Mi

    # CPU request
    # cpu: 2000m

  # -- Pod limit
  limits: {}
    # Memory limit
    # memory: 8192Mi

    # CPU limit
    # cpu: 4000m

# Configure extra options for liveness probe
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
livenessProbe:
  # -- Enable livenessProbe
  enabled: true

  # -- Request path for livenessProbe
  path: /

  # -- Initial delay seconds for livenessProbe
  initialDelaySeconds: 60

  # -- Period seconds for livenessProbe
  periodSeconds: 10

  # -- Timeout seconds for livenessProbe
  timeoutSeconds: 5

  # -- Failure threshold for livenessProbe
  failureThreshold: 6

  # -- Success threshold for livenessProbe
  successThreshold: 1

# Configure extra options for readiness probe
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
readinessProbe:
  # -- Enable readinessProbe
  enabled: true

  # -- Request path for readinessProbe
  path: /

  # -- Initial delay seconds for readinessProbe
  initialDelaySeconds: 30

  # -- Period seconds for readinessProbe
  periodSeconds: 5

  # -- Timeout seconds for readinessProbe
  timeoutSeconds: 3

  # -- Failure threshold for readinessProbe
  failureThreshold: 6

  # -- Success threshold for readinessProbe
  successThreshold: 1

# Configure autoscaling
autoscaling:
  # -- Enable autoscaling
  enabled: false

  # -- Number of minimum replicas
  minReplicas: 1

  # -- Number of maximum replicas
  maxReplicas: 100

  # -- CPU usage to target replica
  targetCPUUtilizationPercentage: 80

  # -- targetMemoryUtilizationPercentage: 80

# -- Additional volumes on the output Deployment definition.
volumes: []
# -- - name: foo
#   secret:
#     secretName: mysecret
#     optional: false

# -- Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# -- - name: foo
#   mountPath: "/etc/foo"
#   readOnly: true

# -- Additional arguments on the output Deployment definition.
extraArgs: []

# -- Additional environments variables on the output Deployment definition.
# For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go
extraEnv: []
#  - name: OLLAMA_DEBUG
#    value: "1"

# -- Additionl environment variables from external sources (like ConfigMap)
extraEnvFrom: []
#  - configMapRef:
#      name: my-env-configmap

# Enable persistence using Persistent Volume Claims
# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/
persistentVolume:
  # -- Enable persistence using PVC
  enabled: false

  # -- Ollama server data Persistent Volume access modes
  # Must match those of existing PV or dynamic provisioner
  # Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
  accessModes:
    - ReadWriteOnce

  # -- Ollama server data Persistent Volume annotations
  annotations: {}

  # -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the
  # created + ready PVC here. If set, this Chart will not create the default PVC.
  # Requires server.persistentVolume.enabled: true
  existingClaim: ""

  # -- Ollama server data Persistent Volume size
  size: 30Gi

  # -- Ollama server data Persistent Volume Storage Class
  # If defined, storageClassName: <storageClass>
  # If set to "-", storageClassName: "", which disables dynamic provisioning
  # If undefined (the default) or set to null, no storageClassName spec is
  # set, choosing the default provisioner.  (gp2 on AWS, standard on
  # GKE, AWS & OpenStack)
  storageClass: ""

  # -- Ollama server data Persistent Volume Binding Mode
  # If defined, volumeMode: <volumeMode>
  # If empty (the default) or set to null, no volumeBindingMode spec is
  # set, choosing the default mode.
  volumeMode: ""

  # -- Subdirectory of Ollama server data Persistent Volume to mount
  # Useful if the volume's root directory is not empty
  subPath: ""

  # -- Pre-existing PV to attach this claim to
  # Useful if a CSI auto-provisions a PV for you and you want to always
  # reference the PV moving forward
  volumeName: ""

# -- Node labels for pod assignment.
nodeSelector: {}

# -- Tolerations for pod assignment
tolerations: []

# -- Affinity for pod assignment
affinity: {}

# -- Lifecycle for pod assignment (override ollama.models startup pull/run)
lifecycle: {}

# How to replace existing pods
updateStrategy:
  # -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate
  type: "Recreate"

# -- Topology Spread Constraints for pod assignment
topologySpreadConstraints: {}

# -- Wait for a grace period
terminationGracePeriodSeconds: 120

# -- Init containers to add to the pod
initContainers: []
# - name: startup-tool
#   image: alpine:3
#   command: [sh, -c]
#   args:
#     - echo init

# -- Use the host’s ipc namespace.
hostIPC: false

# -- Use the host’s pid namespace
hostPID: false

# -- Use the host's network namespace.
hostNetwork: false

# -- Extra K8s manifests to deploy
extraObjects: []
#  - apiVersion: v1
#    kind: PersistentVolume
#    metadata:
#      name: aws-efs
#    data:
#      key: "value"
#  - apiVersion: scheduling.k8s.io/v1
#    kind: PriorityClass
#    metadata:
#      name: high-priority
#    value: 1000000
#    globalDefault: false
#    description: "This priority class should be used for XYZ service pods only."

# Test connection pods
tests:
  enabled: true
  # -- Labels to add to the tests
  labels: {}
  # -- Annotations to add to the tests
  annotations: {}