Fix coderabbit suggestions and rename secret

jgarciao · jgarciao · commit 64ea827275ba · 2025-07-07T11:59:05.000+02:00
Signed-off-by: Jorge Garcia Oncins &lt;jgarciao@redhat.com&gt;
diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md
@@ -44,19 +44,23 @@ oc patch secret hf-token-secret --type='merge' -p='{"data":{"HF_TOKEN":"'$(echo
 
 #### Option D: setup using an inference model deployed remotely
 
-Note: do not use VLLM_TLS_VERIFY=false in production environments
+
 ```bash
-# Create secret llama-stack-remote-inference-model-secret providing remote model info
-export INFERENCE_MODEL=llama-3-2-3b
-export VLLM_URL=https://llama-3-2-3b.apps.remote-cluster.com:443/v1
-export VLLM_TLS_VERIFY=false
-export VLLM_API_TOKEN=XXXXXXXXXXXXXXXXXXXXXXX
-
-oc create secret generic llama-stack-remote-inference-model-secret \
-  --from-literal INFERENCE_MODEL=$INFERENCE_MODEL   \
-  --from-literal VLLM_URL=$VLLM_URL                 \
-  --from-literal VLLM_TLS_VERIFY=$VLLM_TLS_VERIFY   \
-  --from-literal VLLM_API_TOKEN=$VLLM_API_TOKEN     
+# Create secret llama-stack-inference-model-secret providing model info
+# Important: 
+#  - Make sure that the value for INFERENCE_MODEL is correct (it doesn't have points)
+#  - In VLLM_URL you can use internal or external endpoints for the model. Add /v1 at the end
+#  - NEVER set VLLM_TLS_VERIFY=false in production
+export INFERENCE_MODEL="llama-3-2-3b"
+export VLLM_URL="https://llama-3-2-3b.apps.remote-cluster.com:443/v1"
+export VLLM_TLS_VERIFY="false"
+export VLLM_API_TOKEN="XXXXXXXXXXXXXXXXXXXXXXX"
+
+oc create secret generic llama-stack-inference-model-secret \
+  --from-literal INFERENCE_MODEL="$INFERENCE_MODEL"   \
+  --from-literal VLLM_URL="$VLLM_URL"                 \
+  --from-literal VLLM_TLS_VERIFY="$VLLM_TLS_VERIFY"   \
+  --from-literal VLLM_API_TOKEN="$VLLM_API_TOKEN"     
   
 # Deploy the LlamaStackDistribution
 oc apply -k stack/overlays/vllm-remote-inference-model
diff --git a/stack/overlays/vllm-remote-inference-model/llama-stack-distribution.yaml b/stack/overlays/vllm-remote-inference-model/llama-stack-distribution.yaml
@@ -19,25 +19,25 @@ spec:
           valueFrom:
             secretKeyRef:
               key: INFERENCE_MODEL
-              name: llama-stack-remote-inference-model-secret
+              name: llama-stack-inference-model-secret
               optional: true
         - name: VLLM_URL
           valueFrom:
             secretKeyRef:
               key: VLLM_URL
-              name: llama-stack-remote-inference-model-secret
+              name: llama-stack-inference-model-secret
               optional: true
         - name: VLLM_TLS_VERIFY
           valueFrom:
             secretKeyRef:
               key: VLLM_TLS_VERIFY
-              name: llama-stack-remote-inference-model-secret
+              name: llama-stack-inference-model-secret
               optional: true
         - name: VLLM_API_TOKEN
           valueFrom:
             secretKeyRef:
               key: VLLM_API_TOKEN
-              name: llama-stack-remote-inference-model-secret
+              name: llama-stack-inference-model-secret
               optional: true
         - name: MILVUS_DB_PATH
           value: ~/.llama/milvus.db
diff --git a/stack/overlays/vllm-remote-inference-model/llama-stack-inference-model-secret.yaml b/stack/overlays/vllm-remote-inference-model/llama-stack-inference-model-secret.yaml
@@ -0,0 +1,16 @@
+# Secret added as example. It should be manually created with the right values via
+# oc create secret generic ... before creating the llama-stack-distribution
+# Important: 
+#  - Make sure that the value for INFERENCE_MODEL is correct (it doesn't have points)
+#  - In VLLM_URL you can use internal or external endpoints for the model. Add /v1 at the end
+#  - NEVER set VLLM_TLS_VERIFY=false in production
+apiVersion: v1
+kind: Secret
+metadata:
+  name: llama-stack-inference-model-secret
+type: Opaque  
+stringData:
+  INFERENCE_MODEL: "<your-model-id>"
+  VLLM_API_TOKEN: "<paste-api-token>"
+  VLLM_TLS_VERIFY: "true"                # or "false"
+  VLLM_URL: "https://your-model-id.example.com/v1"
diff --git a/stack/overlays/vllm-remote-inference-model/llama-stack-remote-inference-model-secret.yaml b/stack/overlays/vllm-remote-inference-model/llama-stack-remote-inference-model-secret.yaml