-
Notifications
You must be signed in to change notification settings - Fork 786
Open
Labels
Description
100% CPU
nvidia-msi:
deployment
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-device-plugin-daemonset
namespace: kube-system
spec:
revisionHistoryLimit: 10
selector:
matchLabels:
name: nvidia-device-plugin-ds
template:
metadata:
labels:
name: nvidia-device-plugin-ds
namespace: kube-system
spec:
containers:
- env:
- name: FAIL_ON_INIT_ERROR
value: "false"
- name: MIG_STRATEGY
value: mixed
# both v0.18.0 and v0.16.2 have the same problems.
image: my-internal-registry/k8s-device-plugin:v0.16.2
imagePullPolicy: IfNotPresent
name: nvidia-device-plugin-ctr
resources: {}
securityContext:
privileged: true
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /var/lib/kubelet/device-plugins
name: device-plugin
dnsPolicy: ClusterFirst
nodeSelector:
GPU: "true"
priorityClassName: system-node-critical
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 30
tolerations:
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
volumes:
- hostPath:
path: /var/lib/kubelet/device-plugins
type: ""
name: device-plugin
updateStrategy:
rollingUpdate:
maxSurge: 0
maxUnavailable: 1
type: RollingUpdate