Skip to content

Commit 5ab85f5

Browse files
mirodrr2mirodrr
andauthored
switch from using versions to using tags, allowing us to lock to a stable version. Upgrade litellm to latest stable. Add delete fake llm load testing server script. (#108)
Co-authored-by: michael rodriguez <[email protected]>
1 parent a4dda40 commit 5ab85f5

File tree

7 files changed

+78
-8
lines changed

7 files changed

+78
-8
lines changed

.env.template

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# LITELLM_VERSION eg: v1.56.5
2-
# Get it from https://github.com/BerriAI/litellm/releases
3-
LITELLM_VERSION="v1.62.1-nightly"
1+
# LITELLM_VERSION eg: main-v1.56.5
2+
# Get it from https://github.com/berriai/litellm/pkgs/container/litellm/versions?filters%5Bversion_type%5D=tagged
3+
LITELLM_VERSION="litellm_stable_release_branch-v1.63.2-stable"
44
TERRAFORM_S3_BUCKET_NAME="" #Must be globally unique
55
BUILD_FROM_SOURCE="false"
66
HOSTED_ZONE_NAME=""
@@ -50,7 +50,7 @@ EKS_ARM_AMI_TYPE="AL2_ARM_64"
5050
EKS_X86_AMI_TYPE="AL2_x86_64"
5151
CPU_ARCHITECTURE="" #If empty, defaults to the architecture of your deployment machine "x86" or "arm"
5252
PUBLIC_LOAD_BALANCER="true"
53-
RDS_INSTANCE_CLASS="db.t3.micro"
53+
RDS_INSTANCE_CLASS="db.t3.small"
5454
RDS_ALLOCATED_STORAGE_GB="20"
5555
REDIS_NODE_TYPE="cache.t3.micro"
5656
REDIS_NUM_CACHE_CLUSTERS="2" #Number of cache clusters (primary and replicas) the replication group will have

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
ARG LITELLM_VERSION=latest
2-
FROM ghcr.io/berriai/litellm:main-${LITELLM_VERSION}
2+
FROM ghcr.io/berriai/litellm:${LITELLM_VERSION}

create-fake-llm-load-testing-server.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ if [ $? -eq 0 ]; then
8181
--cluster $LITELLM_ECS_CLUSTER \
8282
--service $LITELLM_ECS_TASK \
8383
--force-new-deployment \
84-
--desired-count 1 \
84+
--desired-count 3 \
8585
--no-cli-pager
8686
else
8787
echo "Deployment failed"
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
2+
#!/bin/bash
3+
set -aeuo pipefail
4+
5+
aws_region=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]')
6+
echo $aws_region
7+
8+
APP_NAME=fakeserver
9+
10+
source .env
11+
12+
cd litellm-terraform-stack
13+
VPC_ID=$(terraform output -raw vpc_id)
14+
cd ..
15+
16+
cd litellm-fake-llm-load-testing-server-terraform
17+
18+
if [ -n "$CPU_ARCHITECTURE" ]; then
19+
# Check if CPU_ARCHITECTURE is either "x86" or "arm"
20+
case "$CPU_ARCHITECTURE" in
21+
"x86"|"arm")
22+
ARCH="$CPU_ARCHITECTURE"
23+
;;
24+
*)
25+
echo "Error: CPU_ARCHITECTURE must be either 'x86' or 'arm'"
26+
exit 1
27+
;;
28+
esac
29+
else
30+
# Determine architecture from system
31+
ARCH=$(uname -m)
32+
case $ARCH in
33+
x86_64)
34+
ARCH="x86"
35+
;;
36+
arm64)
37+
ARCH="arm"
38+
;;
39+
*)
40+
echo "Unsupported architecture: $ARCH"
41+
exit 1
42+
;;
43+
esac
44+
fi
45+
46+
echo $ARCH
47+
48+
echo "about to destroy"
49+
50+
export TF_VAR_vpc_id="vpc-02b681fa786fa8292"
51+
export TF_VAR_ecr_fake_server_repository=$APP_NAME
52+
export TF_VAR_architecture=$ARCH
53+
export TF_VAR_fake_llm_load_testing_endpoint_certifiacte_arn=$FAKE_LLM_LOAD_TESTING_ENDPOINT_CERTIFICATE_ARN
54+
export TF_VAR_fake_llm_load_testing_endpoint_hosted_zone_name=$FAKE_LLM_LOAD_TESTING_ENDPOINT_HOSTED_ZONE_NAME
55+
export TF_VAR_fake_llm_load_testing_endpoint_record_name=$FAKE_LLM_LOAD_TESTING_ENDPOINT_RECORD_NAME
56+
57+
58+
cat > backend.hcl << EOF
59+
bucket = "${TERRAFORM_S3_BUCKET_NAME}"
60+
key = "terraform-fake-llm-server.tfstate"
61+
region = "${aws_region}"
62+
encrypt = true
63+
EOF
64+
echo "Generated backend.hcl configuration"
65+
66+
terraform init -backend-config=backend.hcl -reconfigure
67+
terraform destroy -auto-approve
68+
69+
echo "destroyed"

deploy.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ echo "EKS_X86_INSTANCE_TYPE: $EKS_X86_INSTANCE_TYPE"
139139
echo "EKS_ARM_AMI_TYPE: $EKS_ARM_AMI_TYPE"
140140
echo "EKS_X86_AMI_TYPE: $EKS_X86_AMI_TYPE"
141141
echo "PUBLIC_LOAD_BALANCER: $PUBLIC_LOAD_BALANCER"
142-
echo "RDS_INSTANCE_CLASS: $PUBLIC_LOAD_BALANCER"
142+
echo "RDS_INSTANCE_CLASS: $RDS_INSTANCE_CLASS"
143143
echo "RDS_ALLOCATED_STORAGE_GB: $RDS_ALLOCATED_STORAGE_GB"
144144
echo "REDIS_NODE_TYPE: $REDIS_NODE_TYPE"
145145
echo "REDIS_NUM_CACHE_CLUSTERS: $REDIS_NUM_CACHE_CLUSTERS"

litellm-fake-llm-load-testing-server-terraform/main.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ resource "aws_ecs_service" "fake_server_service" {
215215
name = "FakeServer"
216216
cluster = aws_ecs_cluster.fake_llm_cluster.id
217217
task_definition = aws_ecs_task_definition.fake_server_task_def.arn
218-
desired_count = 1
218+
desired_count = 3
219219
launch_type = "FARGATE"
220220
health_check_grace_period_seconds = 300
221221

litellm-terraform-stack/modules/base/rds.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,4 +90,5 @@ resource "aws_db_instance" "database" {
9090
monitoring_role_arn = aws_iam_role.rds_enhanced_monitoring.arn
9191
parameter_group_name = aws_db_parameter_group.example_pg.name
9292
copy_tags_to_snapshot = true
93+
apply_immediately = true
9394
}

0 commit comments

Comments
 (0)