Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 49 additions & 24 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,23 @@ SHELL := /usr/bin/env bash
# Defaults
TARGETOS ?= $(shell go env GOOS)
TARGETARCH ?= $(shell go env GOARCH)
NAMESPACE ?= hc4ai-operator
# Image default
PROJECT_NAME ?= llm-d-inference-scheduler
IMAGE_REGISTRY ?= ghcr.io/llm-d
IMAGE_TAG_BASE ?= $(IMAGE_REGISTRY)/$(PROJECT_NAME)
EPP_IMG_TAG_BASE ?= $(IMAGE_REGISTRY)/$(PROJECT_NAME)
EPP_TAG ?= dev
IMG = $(IMAGE_TAG_BASE):$(EPP_TAG)
NAMESPACE ?= hc4ai-operator
EPP_IMAGE = $(EPP_IMG_TAG_BASE):$(EPP_TAG)
export EPP_IMAGE
# Image dependency defaults
VLLM_SIM_IMG_TAG_BASE ?= $(IMAGE_REGISTRY)/llm-d-inference-sim
VLLM_SIMULATOR_TAG ?= latest
VLLM_SIMULATOR_IMAGE = $(VLLM_SIM_IMG_TAG_BASE):${VLLM_SIMULATOR_TAG}
export VLLM_SIMULATOR_IMAGE
ROUTING_SIDECAR_IMG_TAG_BASE ?= $(IMAGE_REGISTRY)/llm-d-routing-sidecar
ROUTING_SIDECAR_TAG ?= v0.2.0
ROUTING_SIDECAR_IMAGE = ${ROUTING_SIDECAR_IMG_TAG_BASE}:${ROUTING_SIDECAR_TAG}
export ROUTING_SIDECAR_IMAGE

# Map go arch to typos arch
ifeq ($(TARGETARCH),amd64)
Expand All @@ -37,6 +48,7 @@ TYPOS_ARCH = $(TYPOS_TARGET_ARCH)-unknown-linux-musl
endif

CONTAINER_TOOL := $(shell { command -v docker >/dev/null 2>&1 && echo docker; } || { command -v podman >/dev/null 2>&1 && echo podman; } || echo "")
export CONTAINER_TOOL
BUILDER := $(shell command -v buildah >/dev/null 2>&1 && echo buildah || echo $(CONTAINER_TOOL))
PLATFORMS ?= linux/amd64 # linux/arm64 # linux/s390x,linux/ppc64le

Expand Down Expand Up @@ -94,7 +106,7 @@ test-integration: download-tokenizer install-dependencies ## Run integration tes
go test -ldflags="$(LDFLAGS)" -v -tags=integration_tests ./test/integration/

.PHONY: test-e2e
test-e2e: image-build ## Run end-to-end tests against a new kind cluster
test-e2e: image-build image-pull ## Run end-to-end tests against a new kind cluster
@printf "\033[33;1m==== Running End to End Tests ====\033[0m\n"
./test/scripts/run_e2e.sh

Expand All @@ -119,20 +131,26 @@ build: check-go install-dependencies download-tokenizer ## Build the project
##@ Container Build/Push

.PHONY: image-build
image-build: check-container-tool ## Build Docker image ## Build Docker image using $(CONTAINER_TOOL)
@printf "\033[33;1m==== Building Docker image $(IMG) ====\033[0m\n"
image-build: check-container-tool ## Build Docker image using $(CONTAINER_TOOL)
@printf "\033[33;1m==== Building Docker image $(EPP_IMAGE) ====\033[0m\n"
$(CONTAINER_TOOL) build \
--platform linux/$(TARGETARCH) \
--build-arg TARGETOS=linux \
--build-arg TARGETARCH=$(TARGETARCH) \
--build-arg COMMIT_SHA=${GIT_COMMIT_SHA} \
--build-arg BUILD_REF=${BUILD_REF} \
-t $(IMG) .
-t $(EPP_IMAGE) .

.PHONY: image-push
image-push: check-container-tool ## Push Docker image $(IMG) to registry
@printf "\033[33;1m==== Pushing Docker image $(IMG) ====\033[0m\n"
$(CONTAINER_TOOL) push $(IMG)
image-push: check-container-tool ## Push Docker image $(EPP_IMAGE) to registry
@printf "\033[33;1m==== Pushing Docker image $(EPP_IMAGE) ====\033[0m\n"
$(CONTAINER_TOOL) push $(EPP_IMAGE)

.PHONY: image-pull
image-pull: check-container-tool ## Pull all related images using $(CONTAINER_TOOL)
@printf "\033[33;1m==== Pulling Docker images ====\033[0m\n"
./scripts/pull_images.sh


##@ Install/Uninstall Targets

Expand All @@ -148,7 +166,7 @@ uninstall: uninstall-docker ## Default uninstall using Docker
.PHONY: install-docker
install-docker: check-container-tool ## Install app using $(CONTAINER_TOOL)
@echo "Starting container with $(CONTAINER_TOOL)..."
$(CONTAINER_TOOL) run -d --name $(PROJECT_NAME)-container $(IMG)
$(CONTAINER_TOOL) run -d --name $(PROJECT_NAME)-container $(EPP_IMAGE)
@echo "$(CONTAINER_TOOL) installation complete."
@echo "To use $(PROJECT_NAME), run:"
@echo "alias $(PROJECT_NAME)='$(CONTAINER_TOOL) exec -it $(PROJECT_NAME)-container /app/$(PROJECT_NAME)'"
Expand Down Expand Up @@ -193,12 +211,12 @@ uninstall-k8s: check-kubectl check-kustomize check-envsubst ## Uninstall from Ku

.PHONY: install-openshift
install-openshift: check-kubectl check-kustomize check-envsubst ## Install on OpenShift
@echo $$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION
@echo $$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE
@echo "Creating namespace $(NAMESPACE)..."
kubectl create namespace $(NAMESPACE) 2>/dev/null || true
@echo "Deploying common resources from deploy/ ..."
# Build and substitute the base manifests from deploy, then apply them
kustomize build deploy/environments/openshift-base | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl apply -n $(NAMESPACE) -f -
kustomize build deploy/environments/openshift-base | envsubst '$$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE' | kubectl apply -n $(NAMESPACE) -f -
@echo "Waiting for pod to become ready..."
sleep 5
@POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -n $(NAMESPACE) -o jsonpath='{.items[0].metadata.name}'); \
Expand All @@ -209,9 +227,9 @@ install-openshift: check-kubectl check-kustomize check-envsubst ## Install on Op
.PHONY: uninstall-openshift
uninstall-openshift: check-kubectl check-kustomize check-envsubst ## Uninstall from OpenShift
@echo "Removing resources from OpenShift..."
kustomize build deploy/environments/openshift-base | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl delete --force -f - || true
kustomize build deploy/environments/openshift-base | envsubst '$$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE' | kubectl delete --force -f - || true
# @if kubectl api-resources --api-group=route.openshift.io | grep -q Route; then \
# envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' < deploy/openshift/route.yaml | kubectl delete --force -f - || true; \
# envsubst '$$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE' < deploy/openshift/route.yaml | kubectl delete --force -f - || true; \
# fi
@POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -n $(NAMESPACE) -o jsonpath='{.items[0].metadata.name}'); \
echo "Deleting pod: $$POD"; \
Expand All @@ -223,19 +241,20 @@ uninstall-openshift: check-kubectl check-kustomize check-envsubst ## Uninstall f
.PHONY: install-rbac
install-rbac: check-kubectl check-kustomize check-envsubst ## Install RBAC
@echo "Applying RBAC configuration from deploy/rbac..."
kustomize build deploy/environments/openshift-base/rbac | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl apply -f -
kustomize build deploy/environments/openshift-base/rbac | envsubst '$$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE' | kubectl apply -f -

.PHONY: uninstall-rbac
uninstall-rbac: check-kubectl check-kustomize check-envsubst ## Uninstall RBAC
@echo "Removing RBAC configuration from deploy/rbac..."
kustomize build deploy/environments/openshift-base/rbac | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl delete -f - || true
kustomize build deploy/environments/openshift-base/rbac | envsubst '$$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE' | kubectl delete -f - || true

##@ Environment
.PHONY: env
env: ## Print environment variables
@echo "IMAGE_TAG_BASE=$(IMAGE_TAG_BASE)"
@echo "IMG=$(IMG)"
@echo "EPP_IMAGE=$(EPP_IMAGE)"
@echo "CONTAINER_TOOL=$(CONTAINER_TOOL)"
@echo "NAMESPACE=${NAMESPACE}"
@echo "GIT_COMMIT_SHA=${GIT_COMMIT_SHA}"

.PHONY: check-typos
check-typos: $(TYPOS) ## Check for spelling errors using typos (exits with error if found)
Expand Down Expand Up @@ -293,9 +312,16 @@ check-envsubst:

.PHONY: check-container-tool
check-container-tool:
@command -v $(CONTAINER_TOOL) >/dev/null 2>&1 || { \
echo "❌ $(CONTAINER_TOOL) is not installed."; \
echo "🔧 Try: sudo apt install $(CONTAINER_TOOL) OR brew install $(CONTAINER_TOOL)"; exit 1; }
@if [ -z "$(CONTAINER_TOOL)" ]; then \
echo "❌ Error: No container tool detected. Please install docker or podman."; \
exit 1; \
elif ! command -v $(CONTAINER_TOOL) >/dev/null 2>&1; then \
echo "❌ Error: '$(CONTAINER_TOOL)' is not installed or not in your PATH."; \
echo "🔧 Try: sudo apt install $(CONTAINER_TOOL) OR brew install $(CONTAINER_TOOL)"; \
exit 1; \
else \
echo "✅ Container tool '$(CONTAINER_TOOL)' found."; \
fi

.PHONY: check-kubectl
check-kubectl:
Expand Down Expand Up @@ -348,8 +374,7 @@ env-dev-kind: ## Run under kind ($(KIND_CLUSTER_NAME))
$(MAKE) image-build && \
CLUSTER_NAME=$(KIND_CLUSTER_NAME) \
GATEWAY_HOST_PORT=$(KIND_GATEWAY_HOST_PORT) \
IMAGE_REGISTRY=$(IMAGE_REGISTRY) \
EPP_TAG=$(EPP_TAG) \
EPP_IMAGE=$(EPP_IMAGE) \
./scripts/kind-dev-env.sh; \
fi

Expand Down
2 changes: 1 addition & 1 deletion deploy/components/inference-gateway/deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
terminationGracePeriodSeconds: 130
containers:
- name: epp
image: ghcr.io/llm-d/llm-d-inference-scheduler:latest
image: ${EPP_IMAGE}
imagePullPolicy: IfNotPresent
args:
- --pool-name
Expand Down
4 changes: 0 additions & 4 deletions deploy/components/inference-gateway/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,3 @@ resources:
- deployments.yaml
- gateways.yaml
- httproutes.yaml

images:
- name: ghcr.io/llm-d/llm-d-inference-scheduler
newTag: ${EPP_TAG}
6 changes: 3 additions & 3 deletions deploy/components/vllm-sim-pd/deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ spec:
spec:
containers:
- name: vllm
image: ghcr.io/llm-d/llm-d-inference-sim:latest
image: ${VLLM_SIMULATOR_IMAGE}
imagePullPolicy: IfNotPresent
args:
- "--port=8000"
Expand Down Expand Up @@ -49,7 +49,7 @@ spec:
spec:
initContainers:
- name: routing-sidecar
image: ghcr.io/llm-d/llm-d-routing-sidecar:latest
image: ${ROUTING_SIDECAR_IMAGE}
imagePullPolicy: IfNotPresent
args:
- "--port=8000"
Expand All @@ -61,7 +61,7 @@ spec:
restartPolicy: Always
containers:
- name: vllm
image: ghcr.io/llm-d/llm-d-inference-sim:latest
image: ${VLLM_SIMULATOR_IMAGE}
imagePullPolicy: IfNotPresent
args:
- "--port=8200"
Expand Down
6 changes: 0 additions & 6 deletions deploy/components/vllm-sim-pd/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,3 @@ kind: Kustomization

resources:
- deployments.yaml

images:
- name: ghcr.io/llm-d/llm-d-inference-sim
newTag: ${VLLM_SIMULATOR_TAG}
- name: ghcr.io/llm-d/llm-d-routing-sidecar
newTag: ${ROUTING_SIDECAR_TAG}
2 changes: 1 addition & 1 deletion deploy/components/vllm-sim/deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
spec:
containers:
- name: vllm
image: ghcr.io/llm-d/llm-d-inference-sim:latest
image: ${VLLM_SIMULATOR_IMAGE}
imagePullPolicy: IfNotPresent
args:
- "--port=8000"
Expand Down
4 changes: 0 additions & 4 deletions deploy/components/vllm-sim/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,3 @@ kind: Kustomization

resources:
- deployments.yaml

images:
- name: ghcr.io/llm-d/llm-d-inference-sim
newTag: ${VLLM_SIMULATOR_TAG}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ spec:
spec:
containers:
- name: epp
image: ${EPP_IMAGE}:${EPP_TAG}
image: ${EPP_IMAGE}
imagePullPolicy: Always
args:
- --pool-name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ spec:
serviceAccountName: operator-controller-manager
containers:
- name: cmd
image: ${IMAGE_TAG_BASE}:${VERSION}
image: ${EPP_IMAGE}
imagePullPolicy: Always
6 changes: 0 additions & 6 deletions deploy/environments/openshift-base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,6 @@ configMapGenerator:
disableNameSuffixHash: true

# Include patches to update the Service, StatefulSet, Route, and RBAC resources.

# Define the image to be updated.
# images:
# - name: ghcr.io/llm-d/placeholder
# newName: ghcr.io/llm-d/${IMAGE_TAG_BASE}
# newTag: ${VERSION}
patches:
- path: common/patch-service.yaml
- path: common/patch-statefulset.yaml
Expand Down
28 changes: 11 additions & 17 deletions scripts/kind-dev-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,10 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
: "${IMAGE_REGISTRY:=ghcr.io/llm-d}"

# Set a default VLLM_SIMULATOR_IMAGE if not provided
: "${VLLM_SIMULATOR_IMAGE:=llm-d-inference-sim}"

# Set a default VLLM_SIMULATOR_TAG if not provided
export VLLM_SIMULATOR_TAG="${VLLM_SIMULATOR_TAG:-latest}"
export VLLM_SIMULATOR_IMAGE="${VLLM_SIMULATOR_IMAGE:-ghcr.io/llm-d/llm-d-inference-sim:latest}"

# Set a default EPP_IMAGE if not provided
: "${EPP_IMAGE:=llm-d-inference-scheduler}"

# Set a default EPP_TAG if not provided
export EPP_TAG="${EPP_TAG:-dev}"
export EPP_IMAGE="${EPP_IMAGE:-ghcr.io/llm-d/llm-d-inference-scheduler:dev}"

# Set the model name to deploy
export MODEL_NAME="${MODEL_NAME:-food-review}"
Expand All @@ -46,8 +40,8 @@ export MODEL_NAME_SAFE=$(echo "${MODEL_ID}" | tr '[:upper:]' '[:lower:]' | tr '
# Set the endpoint-picker to deploy
export EPP_NAME="${EPP_NAME:-${MODEL_NAME_SAFE}-endpoint-picker}"

# Set the default routing side car image tag
export ROUTING_SIDECAR_TAG="${ROUTING_SIDECAR_TAG:-0.0.6}"
# Set the default routing side car image
export ROUTING_SIDECAR_IMAGE="${ROUTING_SIDECAR_IMAGE:-ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0}"

# Set the inference pool name for the deployment
export POOL_NAME="${POOL_NAME:-${MODEL_NAME_SAFE}-inference-pool}"
Expand Down Expand Up @@ -152,19 +146,19 @@ kubectl --context ${KUBE_CONTEXT} -n local-path-storage wait --for=condition=Rea

# Load the vllm simulator image into the cluster
if [ "${CONTAINER_RUNTIME}" == "podman" ]; then
podman save ${IMAGE_REGISTRY}/${VLLM_SIMULATOR_IMAGE}:${VLLM_SIMULATOR_TAG} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin
podman save ${VLLM_SIMULATOR_IMAGE} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin
else
if docker image inspect "${IMAGE_REGISTRY}/${VLLM_SIMULATOR_IMAGE}:${VLLM_SIMULATOR_TAG}" > /dev/null 2>&1; then
if docker image inspect ${VLLM_SIMULATOR_IMAGE} > /dev/null 2>&1; then
echo "INFO: Loading image into KIND cluster..."
kind --name ${CLUSTER_NAME} load docker-image ${IMAGE_REGISTRY}/${VLLM_SIMULATOR_IMAGE}:${VLLM_SIMULATOR_TAG}
kind --name ${CLUSTER_NAME} load docker-image ${VLLM_SIMULATOR_IMAGE}
fi
fi

# Load the ext_proc endpoint-picker image into the cluster
if [ "${CONTAINER_RUNTIME}" == "podman" ]; then
podman save ${IMAGE_REGISTRY}/${EPP_IMAGE}:${EPP_TAG} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin
podman save ${EPP_IMAGE} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin
else
kind --name ${CLUSTER_NAME} load docker-image ${IMAGE_REGISTRY}/${EPP_IMAGE}:${EPP_TAG}
kind --name ${CLUSTER_NAME} load docker-image ${EPP_IMAGE}
fi
# ------------------------------------------------------------------------------
# CRD Deployment (Gateway API + GIE)
Expand Down Expand Up @@ -194,8 +188,8 @@ kubectl --context ${KUBE_CONTEXT} delete configmap epp-config --ignore-not-found
kubectl --context ${KUBE_CONTEXT} create configmap epp-config --from-file=epp-config.yaml=${EPP_CONFIG}

kustomize build --enable-helm ${KUSTOMIZE_DIR} \
| envsubst '${POOL_NAME} ${MODEL_NAME} ${MODEL_NAME_SAFE} ${EPP_NAME} ${EPP_TAG} ${VLLM_SIMULATOR_TAG} \
${PD_ENABLED} ${KV_CACHE_ENABLED} ${ROUTING_SIDECAR_TAG} \
| envsubst '${POOL_NAME} ${MODEL_NAME} ${MODEL_NAME_SAFE} ${EPP_NAME} ${EPP_IMAGE} ${VLLM_SIMULATOR_IMAGE} \
${PD_ENABLED} ${KV_CACHE_ENABLED} ${ROUTING_SIDECAR_IMAGE} \
${VLLM_REPLICA_COUNT} ${VLLM_REPLICA_COUNT_P} ${VLLM_REPLICA_COUNT_D}' \
| kubectl --context ${KUBE_CONTEXT} apply -f -

Expand Down
8 changes: 1 addition & 7 deletions scripts/kubernetes-dev-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ if [[ ! -f "$VLLM_CHART_DIR/Chart.yaml" ]]; then
exit 1
fi

# Default image registry for pulling deployment images
export IMAGE_REGISTRY="${IMAGE_REGISTRY:-ghcr.io/llm-d}"

# -----------------------------------------------------------------------------
# Model Configuration
# -----------------------------------------------------------------------------
Expand Down Expand Up @@ -72,10 +69,7 @@ export POOL_NAME="${POOL_NAME:-${MODEL_NAME_SAFE}-inference-pool}"
export EPP_NAME="${EPP_NAME:-${MODEL_NAME_SAFE}-endpoint-picker}"

# EPP container image name
export EPP_IMAGE="${EPP_IMAGE:-${IMAGE_REGISTRY}/llm-d-inference-scheduler}"

# EPP image tag
export EPP_TAG="${EPP_TAG:-v0.1.0}"
export EPP_IMAGE="${EPP_IMAGE:-ghcr.io/llm-d/llm-d-inference-scheduler:dev}"

# Whether P/D mode is enabled for this deployment
export PD_ENABLED="\"${PD_ENABLED:-false}\""
Expand Down
Loading