diff --git a/README.md b/README.md index 29d4d405..8e63b793 100644 --- a/README.md +++ b/README.md @@ -225,4 +225,24 @@ Update the `deployment.yaml` file to use the dev tag. To verify the deployment is available, run: ```bash kubectl get deployment vllm-llama3-8b-instruct +kubectl get service vllm-llama3-8b-instruct-svc +``` + +Use `kubectl port-forward` to expose the service on your local machine: + +```bash +kubectl port-forward svc/vllm-llama3-8b-instruct-svc 8000:8000 +``` + +Test the API with curl + +```bash +curl -X POST http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "meta-llama/Llama-3.1-8B-Instruct", + "messages": [ + {"role": "user", "content": "Hello!"} + ] + }' ``` diff --git a/manifests/deployment.yaml b/manifests/deployment.yaml index aa23f3d5..01a885da 100644 --- a/manifests/deployment.yaml +++ b/manifests/deployment.yaml @@ -40,3 +40,19 @@ spec: - containerPort: 8000 name: http protocol: TCP +--- +apiVersion: v1 +kind: Service +metadata: + name: vllm-llama3-8b-instruct-svc + labels: + app: vllm-llama3-8b-instruct +spec: + selector: + app: vllm-llama3-8b-instruct + ports: + - protocol: TCP + port: 8000 + targetPort: 8000 + name: http + type: ClusterIP \ No newline at end of file