diff --git a/README.md b/README.md
index 29d4d405..8e63b793 100644
--- a/README.md
+++ b/README.md
@@ -225,4 +225,24 @@ Update the `deployment.yaml` file to use the dev tag.
 To verify the deployment is available, run:
 ```bash
 kubectl get deployment vllm-llama3-8b-instruct
+kubectl get service vllm-llama3-8b-instruct-svc
+```
+
+Use `kubectl port-forward` to expose the service on your local machine:
+
+```bash
+kubectl port-forward svc/vllm-llama3-8b-instruct-svc 8000:8000
+```
+
+Test the API with curl
+
+```bash
+curl -X POST http://localhost:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "messages": [
+      {"role": "user", "content": "Hello!"}
+    ]
+  }'
 ```
diff --git a/manifests/deployment.yaml b/manifests/deployment.yaml
index aa23f3d5..01a885da 100644
--- a/manifests/deployment.yaml
+++ b/manifests/deployment.yaml
@@ -40,3 +40,19 @@ spec:
         - containerPort: 8000
           name: http
           protocol: TCP
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vllm-llama3-8b-instruct-svc
+  labels:
+    app: vllm-llama3-8b-instruct
+spec:
+  selector:
+    app: vllm-llama3-8b-instruct
+  ports:
+    - protocol: TCP
+      port: 8000
+      targetPort: 8000
+      name: http
+  type: ClusterIP
\ No newline at end of file