File tree Expand file tree Collapse file tree 7 files changed +78
-8
lines changed
litellm-fake-llm-load-testing-server-terraform
litellm-terraform-stack/modules/base Expand file tree Collapse file tree 7 files changed +78
-8
lines changed Original file line number Diff line number Diff line change 1- # LITELLM_VERSION eg: v1.56.5
2- # Get it from https://github.com/BerriAI /litellm/releases
3- LITELLM_VERSION = " v1.62.1-nightly "
1+ # LITELLM_VERSION eg: main- v1.56.5
2+ # Get it from https://github.com/berriai /litellm/pkgs/container/litellm/versions?filters%5Bversion_type%5D=tagged
3+ LITELLM_VERSION = " litellm_stable_release_branch- v1.63.2-stable "
44TERRAFORM_S3_BUCKET_NAME = " " # Must be globally unique
55BUILD_FROM_SOURCE = " false"
66HOSTED_ZONE_NAME = " "
@@ -50,7 +50,7 @@ EKS_ARM_AMI_TYPE="AL2_ARM_64"
5050EKS_X86_AMI_TYPE = " AL2_x86_64"
5151CPU_ARCHITECTURE = " " # If empty, defaults to the architecture of your deployment machine "x86" or "arm"
5252PUBLIC_LOAD_BALANCER = " true"
53- RDS_INSTANCE_CLASS = " db.t3.micro "
53+ RDS_INSTANCE_CLASS = " db.t3.small "
5454RDS_ALLOCATED_STORAGE_GB = " 20"
5555REDIS_NODE_TYPE = " cache.t3.micro"
5656REDIS_NUM_CACHE_CLUSTERS = " 2" # Number of cache clusters (primary and replicas) the replication group will have
Original file line number Diff line number Diff line change 11ARG LITELLM_VERSION=latest
2- FROM ghcr.io/berriai/litellm:main- ${LITELLM_VERSION}
2+ FROM ghcr.io/berriai/litellm:${LITELLM_VERSION}
Original file line number Diff line number Diff line change @@ -81,7 +81,7 @@ if [ $? -eq 0 ]; then
8181 --cluster $LITELLM_ECS_CLUSTER \
8282 --service $LITELLM_ECS_TASK \
8383 --force-new-deployment \
84- --desired-count 1 \
84+ --desired-count 3 \
8585 --no-cli-pager
8686else
8787 echo " Deployment failed"
Original file line number Diff line number Diff line change 1+
2+ #! /bin/bash
3+ set -aeuo pipefail
4+
5+ aws_region=$( aws ec2 describe-availability-zones --output text --query ' AvailabilityZones[0].[RegionName]' )
6+ echo $aws_region
7+
8+ APP_NAME=fakeserver
9+
10+ source .env
11+
12+ cd litellm-terraform-stack
13+ VPC_ID=$( terraform output -raw vpc_id)
14+ cd ..
15+
16+ cd litellm-fake-llm-load-testing-server-terraform
17+
18+ if [ -n " $CPU_ARCHITECTURE " ]; then
19+ # Check if CPU_ARCHITECTURE is either "x86" or "arm"
20+ case " $CPU_ARCHITECTURE " in
21+ " x86" |" arm" )
22+ ARCH=" $CPU_ARCHITECTURE "
23+ ;;
24+ * )
25+ echo " Error: CPU_ARCHITECTURE must be either 'x86' or 'arm'"
26+ exit 1
27+ ;;
28+ esac
29+ else
30+ # Determine architecture from system
31+ ARCH=$( uname -m)
32+ case $ARCH in
33+ x86_64)
34+ ARCH=" x86"
35+ ;;
36+ arm64)
37+ ARCH=" arm"
38+ ;;
39+ * )
40+ echo " Unsupported architecture: $ARCH "
41+ exit 1
42+ ;;
43+ esac
44+ fi
45+
46+ echo $ARCH
47+
48+ echo " about to destroy"
49+
50+ export TF_VAR_vpc_id=" vpc-02b681fa786fa8292"
51+ export TF_VAR_ecr_fake_server_repository=$APP_NAME
52+ export TF_VAR_architecture=$ARCH
53+ export TF_VAR_fake_llm_load_testing_endpoint_certifiacte_arn=$FAKE_LLM_LOAD_TESTING_ENDPOINT_CERTIFICATE_ARN
54+ export TF_VAR_fake_llm_load_testing_endpoint_hosted_zone_name=$FAKE_LLM_LOAD_TESTING_ENDPOINT_HOSTED_ZONE_NAME
55+ export TF_VAR_fake_llm_load_testing_endpoint_record_name=$FAKE_LLM_LOAD_TESTING_ENDPOINT_RECORD_NAME
56+
57+
58+ cat > backend.hcl << EOF
59+ bucket = "${TERRAFORM_S3_BUCKET_NAME} "
60+ key = "terraform-fake-llm-server.tfstate"
61+ region = "${aws_region} "
62+ encrypt = true
63+ EOF
64+ echo " Generated backend.hcl configuration"
65+
66+ terraform init -backend-config=backend.hcl -reconfigure
67+ terraform destroy -auto-approve
68+
69+ echo " destroyed"
Original file line number Diff line number Diff line change @@ -139,7 +139,7 @@ echo "EKS_X86_INSTANCE_TYPE: $EKS_X86_INSTANCE_TYPE"
139139echo " EKS_ARM_AMI_TYPE: $EKS_ARM_AMI_TYPE "
140140echo " EKS_X86_AMI_TYPE: $EKS_X86_AMI_TYPE "
141141echo " PUBLIC_LOAD_BALANCER: $PUBLIC_LOAD_BALANCER "
142- echo " RDS_INSTANCE_CLASS: $PUBLIC_LOAD_BALANCER "
142+ echo " RDS_INSTANCE_CLASS: $RDS_INSTANCE_CLASS "
143143echo " RDS_ALLOCATED_STORAGE_GB: $RDS_ALLOCATED_STORAGE_GB "
144144echo " REDIS_NODE_TYPE: $REDIS_NODE_TYPE "
145145echo " REDIS_NUM_CACHE_CLUSTERS: $REDIS_NUM_CACHE_CLUSTERS "
Original file line number Diff line number Diff line change @@ -215,7 +215,7 @@ resource "aws_ecs_service" "fake_server_service" {
215215 name = " FakeServer"
216216 cluster = aws_ecs_cluster. fake_llm_cluster . id
217217 task_definition = aws_ecs_task_definition. fake_server_task_def . arn
218- desired_count = 1
218+ desired_count = 3
219219 launch_type = " FARGATE"
220220 health_check_grace_period_seconds = 300
221221
Original file line number Diff line number Diff line change @@ -90,4 +90,5 @@ resource "aws_db_instance" "database" {
9090 monitoring_role_arn = aws_iam_role. rds_enhanced_monitoring . arn
9191 parameter_group_name = aws_db_parameter_group. example_pg . name
9292 copy_tags_to_snapshot = true
93+ apply_immediately = true
9394}
You can’t perform that action at this time.
0 commit comments