11# Makefile for Cost Effective and Scalable Model Inference on AWS Graviton with EKS
22# This Makefile automates the deployment of the complete ML inference solution
33
4- .PHONY : help install setup-base setup-models setup-gateway setup-observability setup-idp setup-rag setup-rag-strands setup-milvus clean clean-pvcs clean-safe verify-cluster
4+ .PHONY : help install install-platform setup-base setup-models setup-gateway setup-observability setup-idp setup-rag setup-rag-strands setup-milvus clean clean-pvcs clean-safe verify-cluster
55
66# Default target
77help :
88 @echo " Available targets:"
9- @echo " install - Complete installation of all components"
9+ @echo " install - Complete installation of all components including RAG Strands application"
10+ @echo " install-platform - Install platform only (base, models, observability, gateway)"
1011 @echo " verify-cluster - Verify EKS cluster access"
1112 @echo " setup-base - Install base infrastructure components (includes GP3 with Immediate binding)"
1213 @echo " setup-models - Deploy model hosting services"
2122 @echo " clean-pvcs - Remove only persistent volume claims and volumes"
2223 @echo " status - Check deployment status"
2324 @echo " "
25+ @echo " 🚀 Quick Start:"
26+ @echo " Run 'make install' for complete setup including the multi-agent RAG system"
27+ @echo " "
2428 @echo " Storage Configuration:"
2529 @echo " - GP3 storage class uses Immediate binding mode to prevent timeout issues"
2630 @echo " - This ensures StatefulSets and complex workloads provision volumes correctly"
@@ -29,21 +33,56 @@ help:
2933 @echo " - EKS cluster must be set up following AWS Solutions Guidance"
3034 @echo " - kubectl configured to access the cluster"
3135 @echo " - Required environment variables configured"
36+ @echo " - TAVILY_API_KEY for web search functionality"
3237
33- # Complete installation
34- install : verify-cluster setup-base setup-models setup-observability setup-gateway
38+ # Complete installation including RAG Strands application
39+ install : verify-cluster setup-base setup-models setup-observability setup-gateway setup-rag-strands
3540 @echo " ✅ Complete installation finished!"
3641 @echo " "
42+ @echo " 🎉 Your complete Agentic AI platform is now deployed with:"
43+ @echo " ✓ Base infrastructure (KubeRay, GPU operators, storage)"
44+ @echo " ✓ Model hosting services (Ray Serve, vLLM)"
45+ @echo " ✓ Observability tools (Langfuse)"
46+ @echo " ✓ Model gateway (LiteLLM proxy)"
47+ @echo " ✓ Multi-agent RAG system with Strands SDK"
48+ @echo " "
49+ @echo " 🔧 Configuration completed during installation:"
50+ @echo " - LiteLLM proxy with unified API gateway"
51+ @echo " - Langfuse for LLM observability and tracing"
52+ @echo " - OpenSearch cluster for vector storage"
53+ @echo " - Multi-agent system with web search capabilities"
54+ @echo " "
55+ @echo " 🚀 Your system is ready to use!"
56+ @echo " - Access the RAG application via the deployed ALB endpoint"
57+ @echo " - All agents include built-in OpenTelemetry tracing"
58+ @echo " - Web search integration with Tavily API"
59+ @echo " - Comprehensive observability through Langfuse"
60+ @echo " "
61+ @echo " 📖 For detailed usage instructions, refer to the README documentation."
62+
63+ # Platform-only installation (without RAG application)
64+ install-platform : verify-cluster setup-base setup-models setup-observability setup-gateway
65+ @echo " ✅ Platform installation finished!"
66+ @echo " "
67+ @echo " 🎉 Your Agentic AI platform is now deployed with:"
68+ @echo " ✓ Base infrastructure (KubeRay, GPU operators, storage)"
69+ @echo " ✓ Model hosting services (Ray Serve, vLLM)"
70+ @echo " ✓ Observability tools (Langfuse)"
71+ @echo " ✓ Model gateway (LiteLLM proxy)"
72+ @echo " "
3773 @echo " Next steps:"
3874 @echo " 1. Configure LiteLLM:"
39- @echo " - Access LiteLLM web interface"
75+ @echo " - Export the LiteLLM ingress ALB address:"
76+ @echo " export LITELLM_ALB_URL=\$ $( kubectl get ingress litellm-ingress -o jsonpath=' {.status.loadBalancer.ingress[0].hostname}' ) "
77+ @echo " - Access LiteLLM web interface at http://\$ $LITELLM_ALB_URL "
4078 @echo " - Login with username 'admin' and password 'sk-123456'"
4179 @echo " - Create a virtual key in 'Virtual Keys' section"
4280 @echo " - Mark 'All Team Models' for the models field"
43- @echo " - Note down the key value"
81+ @echo " - Note down the key value for use in agentic applications "
4482 @echo " "
4583 @echo " 2. Deploy agentic applications:"
46- @echo " - Refer to the README for agentic application deployment"
84+ @echo " - Run 'make setup-rag-strands' for the multi-agent RAG system"
85+ @echo " - Or refer to the README for other agentic application options"
4786
4887# Verify cluster access
4988verify-cluster :
@@ -88,11 +127,13 @@ setup-gateway: setup-observability
88127 @echo " ✅ Model gateway deployed"
89128 @echo " "
90129 @echo " ⚠️ IMPORTANT: Configure LiteLLM after deployment:"
91- @echo " 1. Access LiteLLM web interface"
92- @echo " 2. Login with username 'admin' and password 'sk-123456'"
93- @echo " 3. Go to 'Virtual Keys' and create a new key"
94- @echo " 4. Mark 'All Team Models' for the models field"
95- @echo " 5. Store the generated secret key for agentic applications"
130+ @echo " 1. Export the LiteLLM ingress ALB address:"
131+ @echo " export LITELLM_ALB_URL=\$ $( kubectl get ingress litellm-ingress -o jsonpath=' {.status.loadBalancer.ingress[0].hostname}' ) "
132+ @echo " 2. Access LiteLLM web interface"
133+ @echo " 3. Login with username 'admin' and password 'sk-123456'"
134+ @echo " 4. Go to 'Virtual Keys' and create a new key"
135+ @echo " 5. Mark 'All Team Models' for the models field"
136+ @echo " 6. Store the generated secret key for agentic applications"
96137
97138# Setup Intelligent Document Processing
98139setup-idp :
0 commit comments