Skip to content

Commit ee79654

Browse files
committed
readme
1 parent 22269fb commit ee79654

File tree

5 files changed

+144
-63
lines changed

5 files changed

+144
-63
lines changed

Makefile

Lines changed: 81 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -208,23 +208,49 @@ setup-benchmark:
208208

209209
# Clean up all deployments including persistent volumes
210210
clean:
211-
@echo "🧹 Cleaning up all deployments and persistent resources..."
211+
@echo "🧹 Cleaning up deployments and resources created by this Makefile..."
212212
@echo ""
213-
@echo "⚠️ WARNING: This will delete ALL resources including persistent data!"
213+
@echo "⚠️ WARNING: This will delete resources installed by this Makefile!"
214214
@echo "📋 Resources that will be deleted:"
215-
@echo " - All application deployments and services"
216-
@echo " - All persistent volume claims and volumes"
217-
@echo " - All custom storage classes"
218-
@echo " - All secrets and configmaps (except system ones)"
219-
@echo " - All custom resource definitions"
220-
@echo " - All operators and system components"
221-
@echo " - All custom namespaces"
222-
@echo " - Base infrastructure components (KubeRay, NVIDIA operators, Karpenter nodepools)"
215+
@echo " - Application deployments and services in default, kuberay, gpu-operator, milvus, and langfuse namespaces"
216+
@echo " - Persistent volume claims and volumes created by this Makefile"
217+
@echo " - Custom storage classes"
218+
@echo " - Secrets and configmaps in managed namespaces (except system ones)"
219+
@echo " - Custom resource definitions related to Ray and Milvus"
220+
@echo " - Operators and components installed by this Makefile"
221+
@echo " - Custom namespaces created by this Makefile"
222+
@echo ""
223+
@echo "📋 Resources that will NOT be deleted:"
224+
@echo " - ArgoCD components"
225+
@echo " - AWS Load Balancer Controller"
226+
@echo " - CoreDNS"
227+
@echo " - EBS CSI Controller"
228+
@echo " - Karpenter"
229+
@echo " - Other system components"
223230
@echo ""
224231
@echo "Press Ctrl+C within 15 seconds to cancel..."
225232
@sleep 15
226233
@echo ""
227-
@echo "🗑️ Removing agentic applications..."
234+
@echo "🗑️ Step 1: Removing workloads installed by this Makefile..."
235+
@echo " Removing deployments (excluding system components)..."
236+
-kubectl delete deployment --all -n default 2>/dev/null || true
237+
-kubectl delete deployment -n kuberay --all 2>/dev/null || true
238+
-kubectl delete deployment -n gpu-operator --all 2>/dev/null || true
239+
-kubectl delete deployment -n milvus --all 2>/dev/null || true
240+
-kubectl delete deployment -n langfuse --all 2>/dev/null || true
241+
@echo " Removing statefulsets (excluding system components)..."
242+
-kubectl delete statefulset --all -n default 2>/dev/null || true
243+
-kubectl delete statefulset -n kuberay --all 2>/dev/null || true
244+
-kubectl delete statefulset -n milvus --all 2>/dev/null || true
245+
-kubectl delete statefulset -n langfuse --all 2>/dev/null || true
246+
@echo " Removing daemonsets (excluding system ones)..."
247+
-kubectl delete daemonset --all -n default 2>/dev/null || true
248+
-kubectl delete daemonset -n gpu-operator --all 2>/dev/null || true
249+
@echo " Removing jobs and cronjobs (excluding system ones)..."
250+
-kubectl delete job --all -n default 2>/dev/null || true
251+
-kubectl delete cronjob --all -n default 2>/dev/null || true
252+
@echo ""
253+
@echo "🗑️ Step 2: Removing agentic applications..."
228254
-kubectl delete -f agent/kubernetes/combined.yaml 2>/dev/null || true
229255
@echo "🗑️ Removing Strands SDK RAG applications..."
230256
-kubectl delete -f agentic-apps/strandsdk_agentic_rag_opensearch/k8s/ 2>/dev/null || true
@@ -233,97 +259,106 @@ clean:
233259
-kubectl delete configmap app-config 2>/dev/null || true
234260
-kubectl delete serviceaccount strandsdk-rag-service-account 2>/dev/null || true
235261
@echo ""
236-
@echo "🗑️ Removing Milvus and related resources..."
262+
@echo "🗑️ Step 3: Removing Milvus and related resources..."
237263
-kubectl delete -f milvus/milvus-nlb-service.yaml 2>/dev/null || true
238264
-kubectl delete -f milvus/milvus-standalone.yaml 2>/dev/null || true
239265
-kubectl delete -f milvus/ebs-storage-class.yaml 2>/dev/null || true
240-
@echo "Waiting for Milvus pods to terminate..."
241-
-kubectl wait --for=delete pod -l app=milvus --timeout=120s 2>/dev/null || true
242266
@echo ""
243-
@echo "🗑️ Removing observability components..."
267+
@echo "🗑️ Step 4: Removing observability components..."
244268
@echo " Uninstalling Langfuse Helm release..."
245269
-helm uninstall langfuse 2>/dev/null || true
246270
-cd model-observability && kubectl delete -f . 2>/dev/null || true
247271
@echo ""
248-
@echo "🗑️ Removing model gateway..."
272+
@echo "🗑️ Step 5: Removing model gateway..."
249273
-cd model-gateway && kubectl delete -f . 2>/dev/null || true
250274
@echo ""
251-
@echo "🗑️ Removing model hosting services..."
275+
@echo "🗑️ Step 6: Removing model hosting services..."
252276
-cd model-hosting && kubectl delete -f . 2>/dev/null || true
253277
@echo ""
254-
@echo "🗑️ Removing base infrastructure components..."
255-
@echo " Removing Karpenter nodepools..."
256-
-kubectl delete -f base_eks_setup/karpenter_nodepool/ 2>/dev/null || true
257-
@echo " Removing GP3 storage class..."
258-
-kubectl delete -f base_eks_setup/gp3.yaml 2>/dev/null || true
259-
@echo " Removing Prometheus monitoring..."
260-
-kubectl delete -f base_eks_setup/prometheus-monitoring.yaml 2>/dev/null || true
278+
@echo "🗑️ Step 7: Waiting for pods to terminate..."
279+
@echo " This may take a few minutes..."
280+
-kubectl wait --for=delete pod --all --all-namespaces --timeout=300s 2>/dev/null || true
281+
@echo ""
282+
@echo "🗑️ Step 8: Force deleting any stuck pods..."
283+
-kubectl delete pods --all --all-namespaces --grace-period=0 --force 2>/dev/null || true
284+
@echo ""
285+
@echo "🗑️ Step 9: Removing persistent volume claims..."
286+
-kubectl delete pvc --all --all-namespaces --timeout=60s 2>/dev/null || true
287+
@echo ""
288+
@echo "🗑️ Step 10: Removing persistent volumes..."
289+
-kubectl delete pv --all --timeout=60s 2>/dev/null || true
261290
@echo ""
262-
@echo "🗑️ Removing NVIDIA GPU Operator..."
291+
@echo "🗑️ Step 11: Removing NVIDIA GPU Operator..."
263292
@echo " Uninstalling NVIDIA GPU Operator Helm releases..."
264293
-helm list -n gpu-operator --short | xargs -r -I {} helm uninstall {} -n gpu-operator 2>/dev/null || true
265294
@echo " Removing NVIDIA Device Plugin..."
266295
-kubectl delete -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.2/deployments/static/nvidia-device-plugin.yml 2>/dev/null || true
267296
@echo " Removing GPU operator namespace..."
268297
-kubectl delete namespace gpu-operator 2>/dev/null || true
269298
@echo ""
270-
@echo "🗑️ Removing KubeRay Operator..."
299+
@echo "🗑️ Step 12: Removing KubeRay Operator..."
271300
@echo " Uninstalling KubeRay Operator Helm release..."
272301
-helm uninstall kuberay-operator -n kuberay 2>/dev/null || true
273302
@echo " Removing KubeRay namespace..."
274303
-kubectl delete namespace kuberay 2>/dev/null || true
275304
@echo ""
276-
@echo "🗑️ Removing persistent volume claims..."
277-
-kubectl delete pvc --all --all-namespaces --timeout=60s 2>/dev/null || true
278-
@echo ""
279-
@echo "🗑️ Removing persistent volumes..."
280-
-kubectl delete pv --all --timeout=60s 2>/dev/null || true
305+
@echo "🗑️ Step 13: NOW removing base infrastructure components..."
306+
@echo " Removing Karpenter nodepools..."
307+
-kubectl delete -f base_eks_setup/karpenter_nodepool/ 2>/dev/null || true
308+
@echo " Removing GP3 storage class..."
309+
-kubectl delete -f base_eks_setup/gp3.yaml 2>/dev/null || true
310+
@echo " Removing Prometheus monitoring..."
311+
-kubectl delete -f base_eks_setup/prometheus-monitoring.yaml 2>/dev/null || true
281312
@echo ""
282-
@echo "🗑️ Removing storage classes (custom ones)..."
313+
@echo "🗑️ Step 14: Removing storage classes (custom ones)..."
283314
-kubectl delete storageclass gp3 2>/dev/null || true
284315
-kubectl delete storageclass gp3-csi 2>/dev/null || true
285316
-kubectl delete storageclass ebs-sc 2>/dev/null || true
286317
@echo ""
287-
@echo "🗑️ Removing secrets and configmaps..."
288-
-kubectl delete secret --all --all-namespaces --field-selector type!=kubernetes.io/service-account-token 2>/dev/null || true
289-
-kubectl delete configmap --all --all-namespaces --field-selector metadata.name!=kube-root-ca.crt 2>/dev/null || true
290-
@echo ""
291-
@echo "🗑️ Removing service accounts in default namespace..."
318+
@echo "🗑️ Step 15: Removing secrets and configmaps created by this Makefile..."
319+
-kubectl delete secret --all -n default --field-selector type!=kubernetes.io/service-account-token 2>/dev/null || true
320+
-kubectl delete secret --all -n kuberay --field-selector type!=kubernetes.io/service-account-token 2>/dev/null || true
321+
-kubectl delete secret --all -n langfuse --field-selector type!=kubernetes.io/service-account-token 2>/dev/null || true
322+
-kubectl delete secret --all -n milvus --field-selector type!=kubernetes.io/service-account-token 2>/dev/null || true
323+
-kubectl delete configmap --all -n default --field-selector metadata.name!=kube-root-ca.crt 2>/dev/null || true
324+
-kubectl delete configmap --all -n kuberay --field-selector metadata.name!=kube-root-ca.crt 2>/dev/null || true
325+
-kubectl delete configmap --all -n langfuse --field-selector metadata.name!=kube-root-ca.crt 2>/dev/null || true
326+
-kubectl delete configmap --all -n milvus --field-selector metadata.name!=kube-root-ca.crt 2>/dev/null || true
327+
@echo ""
328+
@echo "🗑️ Step 16: Removing service accounts in default namespace..."
292329
-kubectl delete serviceaccount --all -n default --field-selector metadata.name!=default 2>/dev/null || true
293330
@echo ""
294-
@echo "🗑️ Removing custom resource definitions..."
331+
@echo "🗑️ Step 17: Removing custom resource definitions..."
295332
-kubectl delete crd rayclusters.ray.io 2>/dev/null || true
296333
-kubectl delete crd rayservices.ray.io 2>/dev/null || true
297334
-kubectl delete crd rayjobs.ray.io 2>/dev/null || true
298335
-kubectl delete crd milvuses.milvus.io 2>/dev/null || true
299336
@echo ""
300-
@echo "🗑️ Removing operators and system components..."
337+
@echo "🗑️ Step 18: Removing operators and system components..."
301338
-kubectl delete -f https://raw.githubusercontent.com/zilliztech/milvus-operator/main/deploy/manifests/deployment.yaml 2>/dev/null || true
302339
-kubectl delete -f https://github.com/jetstack/cert-manager/releases/download/v1.5.3/cert-manager.yaml 2>/dev/null || true
303340
@echo ""
304-
@echo "🗑️ Removing namespaces (non-system)..."
341+
@echo "🗑️ Step 19: Removing namespaces (non-system)..."
305342
-kubectl delete namespace kuberay 2>/dev/null || true
306343
-kubectl delete namespace milvus-operator 2>/dev/null || true
307344
-kubectl delete namespace cert-manager 2>/dev/null || true
308345
-kubectl delete namespace gpu-operator 2>/dev/null || true
309346
@echo ""
310-
@echo "🗑️ Force cleanup any remaining resources..."
347+
@echo "🗑️ Step 20: Final check for any remaining resources..."
311348
@echo "Checking for stuck resources..."
312349
-kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded 2>/dev/null || true
313350
@echo ""
314-
@echo "Force deleting any stuck pods..."
315-
-kubectl delete pods --all --all-namespaces --grace-period=0 --force 2>/dev/null || true
316-
@echo ""
317-
@echo "🗑️ Removing Helm repositories..."
351+
@echo "🗑️ Step 21: Removing Helm repositories..."
318352
-helm repo remove kuberay 2>/dev/null || true
319353
-helm repo remove nvidia 2>/dev/null || true
320354
-helm repo remove langfuse 2>/dev/null || true
321355
@echo ""
322-
@echo "Comprehensive cleanup complete!"
356+
@echo "Cleanup of Makefile-installed components complete!"
323357
@echo ""
324358
@echo "ℹ️ Note: Some AWS Load Balancers and EBS volumes may take additional time to be cleaned up by AWS."
325359
@echo "ℹ️ Check your AWS console to verify all resources have been properly removed."
326360
@echo "ℹ️ Karpenter-managed nodes will be automatically terminated when workloads are removed."
361+
@echo "ℹ️ System components like ArgoCD, AWS Load Balancer Controller, CoreDNS, EBS CSI Controller, and Karpenter were preserved."
327362

328363
# Safe cleanup - removes applications but preserves persistent data
329364
clean-safe:

README.md

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -482,17 +482,15 @@ ALB_ENDPOINT=$(kubectl get ingress strandsdk-rag-ingress-alb -o jsonpath='{.stat
482482
# Test the health endpoint
483483
curl -X GET "http://${ALB_ENDPOINT}/health"
484484

485-
# Test a simple query
486-
curl -X POST "http://${ALB_ENDPOINT}/query" \
485+
# Test knowledge embedding
486+
curl -X POST "http://${ALB_ENDPOINT}/embed" \
487487
-H "Content-Type: application/json" \
488-
-d '{
489-
"query": "What is Bell'\''s palsy?",
490-
"include_web_search": true
491-
}'
488+
-d '{"force_refresh": false}'
492489

493-
# Test knowledge embedding
494-
curl -X POST "http://${ALB_ENDPOINT}/embed-knowledge" \
495-
-H "Content-Type: application/json"
490+
# Force refresh all embeddings
491+
curl -X POST "http://${ALB_ENDPOINT}/embed" \
492+
-H "Content-Type: application/json" \
493+
-d '{"force_refresh": true}'
496494

497495
# Test with a more complex medical query
498496
curl -X POST "http://${ALB_ENDPOINT}/query" \

agentic-apps/strandsdk_agentic_rag_opensearch/setup.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -605,8 +605,8 @@ main() {
605605
echo
606606
echo -e "${BLUE}Test endpoints:${NC}"
607607
echo "Health check: curl -X GET \"http://$ALB_ENDPOINT/health\""
608-
echo "Simple query: curl -X POST \"http://$ALB_ENDPOINT/query\" -H \"Content-Type: application/json\" -d '{\"query\": \"What is Bell's palsy?\", \"include_web_search\": true}'"
609-
echo "Embed knowledge: curl -X POST \"http://$ALB_ENDPOINT/embed-knowledge\" -H \"Content-Type: application/json\""
608+
echo "Embed knowledge: curl -X POST \"http://$ALB_ENDPOINT/embed\" -H \"Content-Type: application/json\" -d '{\"force_refresh\": false}'"
609+
echo "Complex query: curl -i -X POST \"http://$ALB_ENDPOINT/query\" -H \"Content-Type: application/json\" -d '{\"question\": \"Find information about \\\"What was the purpose of the study on encainide and flecainide in patients with supraventricular arrhythmias\\\". Summarize this information and create a comprehensive story.Save the story and important information to a file named \\\"test1.md\\\" in the output directory as a beautiful markdown file.\", \"top_k\": 3}' --max-time 600"
610610
echo
611611
echo -e "${YELLOW}Note: It may take a few minutes for the ALB to become fully available.${NC}"
612612
echo -e "${YELLOW}If services were auto-detected, verify the endpoints are correct for your setup.${NC}"
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: langfuse-web
5+
namespace: default
6+
spec:
7+
template:
8+
spec:
9+
containers:
10+
- name: langfuse-web
11+
env:
12+
- name: REDIS_PORT
13+
value: "6379"
14+
---
15+
apiVersion: apps/v1
16+
kind: Deployment
17+
metadata:
18+
name: langfuse-worker
19+
namespace: default
20+
spec:
21+
template:
22+
spec:
23+
containers:
24+
- name: langfuse-worker
25+
env:
26+
- name: REDIS_PORT
27+
value: "6379"

model-observability/setup.sh

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,33 @@ EOF
101101
kubectl apply -f langfuse-redis-port-patch.yaml
102102
success "Redis port configuration patch applied!"
103103

104-
log "Waiting for Langfuse pods to be ready (timeout: 10 minutes)..."
105-
if kubectl wait --for=condition=ready pods --selector=app.kubernetes.io/instance=langfuse --timeout=600s; then
106-
success "Langfuse deployment completed successfully!"
107-
else
108-
error "Langfuse deployment failed - pods did not become ready within 10 minutes"
109-
fi
104+
log "Waiting for Langfuse pods to be running (timeout: 15 minutes)..."
105+
# Wait for pods to be in Running state instead of Ready condition
106+
start_time=$(date +%s)
107+
timeout=900 # 15 minutes in seconds
108+
109+
while true; do
110+
current_time=$(date +%s)
111+
elapsed=$((current_time - start_time))
112+
113+
if [ $elapsed -gt $timeout ]; then
114+
warn "Timeout reached after 15 minutes, but continuing with deployment"
115+
break
116+
fi
117+
118+
# Count running pods vs total pods
119+
total_pods=$(kubectl get pods --selector=app.kubernetes.io/instance=langfuse --no-headers | wc -l)
120+
running_pods=$(kubectl get pods --selector=app.kubernetes.io/instance=langfuse --no-headers | grep -c "Running" || echo "0")
121+
122+
if [ "$total_pods" -gt 0 ] && [ "$running_pods" -gt 0 ]; then
123+
log "$running_pods out of $total_pods Langfuse pods are running"
124+
success "Langfuse has running pods - continuing with deployment"
125+
break
126+
fi
127+
128+
log "Waiting for Langfuse pods to start running ($elapsed seconds elapsed)..."
129+
sleep 10
130+
done
110131
fi
111132

112133
log "Installing Langfuse web ingress..."

0 commit comments

Comments
 (0)