1
+ apiVersion : serving.kserve.io/v1alpha1
2
+ kind : ServingRuntime
3
+ metadata :
4
+ name : custom-vllm-runtime
5
+ namespace : test-namespace # replace this with your namespace name
6
+ spec :
7
+ annotations :
8
+ sidecar.istio.io/inject : " true"
9
+ sidecar.istio.io/rewriteAppHTTPProbers : " true"
10
+ serving.knative.openshift.io/enablePassthrough : " true"
11
+ opendatahub.io/dashboard : " true"
12
+ openshift.io/display-name : " vLLLM Openai entry point"
13
+ prometheus.io/port : ' 8080'
14
+ prometheus.io/path : " /metrics/"
15
+ builtInAdapter :
16
+ modelLoadingTimeoutMillis : 90000
17
+ containers :
18
+ - args :
19
+ - ' --port=8080'
20
+ - ' --distributed-executor-backend=mp'
21
+ - ' --model=/mnt/models'
22
+ image : quay.io/opendatahub/vllm:stable
23
+ name : kserve-container
24
+ command :
25
+ - python3
26
+ - ' -m'
27
+ - " vllm.entrypoints.openai.api_server"
28
+ ports :
29
+ - containerPort : 8080
30
+ name : http1
31
+ protocol : TCP
32
+ multiModel : false
33
+ supportedModelFormats :
34
+ - autoSelect : true
35
+ name : vLLM
36
+
37
+ ---
38
+ apiVersion : v1
39
+ kind : Secret
40
+ metadata :
41
+ name : test-secret
42
+ namespace : test-namespace # replace this with your namespace name
43
+ type : Opaque
44
+ stringData :
45
+ AWS_S3_ENDPOINT : https://s3.us-east-1.amazonaws.com # replace this with your Storage Bucket endpoint URL
46
+ AWS_S3_BUCKET : <storage-bucket-name> # add your storage bucket name
47
+ AWS_ACCESS_KEY_ID : <access-key-id> # add your storage bucket Access-Key ID
48
+ AWS_SECRET_ACCESS_KEY : <secret-access-key> # add your storage bucket Secret-Access-Key
49
+ AWS_DEFAULT_REGION : us-east-1 # replace this with your Storage Bucket region
50
+
51
+ ---
52
+ apiVersion : v1
53
+ kind : Secret
54
+ metadata :
55
+ name : storage-config
56
+ namespace : test-namespace # replace this with your namespace name
57
+ type : Opaque
58
+ stringData :
59
+ rhelai-s3-data : ' {"access_key_id":"<access-key-id>","bucket":"<storage-bucket-name>","default_bucket":"<storage-bucket-name>","endpoint_url":"https://s3.us-east-1.amazonaws.com","region":"us-east-1","secret_access_key":"<secret-access-key>","type":"s3"}'
60
+ # replace above Storage-Config Secret data with the actual S3 bucket credentials
61
+
62
+ ---
63
+ apiVersion : v1
64
+ kind : ServiceAccount
65
+ metadata :
66
+ name : test-service-account
67
+ namespace : test-namespace # replace this with your namespace name
68
+
69
+ ---
70
+ apiVersion : rbac.authorization.k8s.io/v1
71
+ kind : RoleBinding
72
+ metadata :
73
+ name : test-cluster-role-binding
74
+ namespace : test-namespace # replace this with your namespace nametest-namespace
75
+ subjects :
76
+ - kind : ServiceAccount
77
+ name : test-service-account
78
+ roleRef :
79
+ apiGroup : rbac.authorization.k8s.io
80
+ kind : ClusterRole
81
+ name : view
82
+
83
+ ---
84
+ apiVersion : serving.kserve.io/v1beta1
85
+ kind : InferenceService
86
+ metadata :
87
+ name : mistral
88
+ namespace : test-namespace # replace this with your namespace nametest-namespace
89
+ annotations :
90
+ serving.kserve.io/deploymentMode : " Serverless"
91
+ security.opendatahub.io/enable-auth : " true"
92
+ opendatahub.io/dashboard : ' true'
93
+ spec :
94
+ predictor :
95
+ maxReplicas : 1
96
+ minReplicas : 1
97
+ serviceAccountName : test-service-account # replace this with Service account created above
98
+ model :
99
+ modelFormat :
100
+ name : vLLM
101
+ name : ' '
102
+ runtime : custom-vllm-runtime # replace this with custom serving runtime created above
103
+ resources :
104
+ limits :
105
+ cpu : ' 8'
106
+ memory : 10Gi
107
+ nvidia.com/gpu : ' 1'
108
+ requests :
109
+ cpu : ' 4'
110
+ memory : 8Gi
111
+ nvidia.com/gpu : ' 1'
112
+ storage :
113
+ key : test-secret
114
+ path : mistral/model/
115
+ tolerations :
116
+ - effect : NoSchedule
117
+ key : nvidia.com/gpu
118
+ operator : Exists
0 commit comments