add Service to expose vLLM deployment and update doc (#201)

googs1025 · web-flow · commit f5d40a28d450 · 2025-09-15T09:09:42.000Z
Signed-off-by: googs1025 &lt;googs1025@gmail.com&gt;
diff --git a/README.md b/README.md
@@ -225,4 +225,24 @@ Update the `deployment.yaml` file to use the dev tag.
 To verify the deployment is available, run:
 ```bash
 kubectl get deployment vllm-llama3-8b-instruct
+kubectl get service vllm-llama3-8b-instruct-svc
+```
+
+Use `kubectl port-forward` to expose the service on your local machine:
+
+```bash
+kubectl port-forward svc/vllm-llama3-8b-instruct-svc 8000:8000
+```
+
+Test the API with curl
+
+```bash
+curl -X POST http://localhost:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "messages": [
+      {"role": "user", "content": "Hello!"}
+    ]
+  }'
 ```
diff --git a/manifests/deployment.yaml b/manifests/deployment.yaml
@@ -40,3 +40,19 @@ spec:
         - containerPort: 8000
           name: http
           protocol: TCP
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vllm-llama3-8b-instruct-svc
+  labels:
+    app: vllm-llama3-8b-instruct
+spec:
+  selector:
+    app: vllm-llama3-8b-instruct
+  ports:
+    - protocol: TCP
+      port: 8000
+      targetPort: 8000
+      name: http
+  type: ClusterIP