Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 0 additions & 64 deletions .github/workflows/docker-build-and-push-go.yml

This file was deleted.

57 changes: 0 additions & 57 deletions .github/workflows/docker-build-and-push-python.yml

This file was deleted.

108 changes: 108 additions & 0 deletions .github/workflows/docker-build-and-push.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
name: Build and Push Docker Images

on:
pull_request:
branches:
- main
paths:
- "docker/**"
- "python/**"
push:
branches:
- main
tags:
- "v*.*.*"
paths:
- '**.go'
- "pkg/**"
- "cmd/**"
- "go.mod"
- "go.sum"
- 'python/**'

jobs:
build-go:
if: github.repository == 'volcano-sh/kthena' && (github.event_name != 'pull_request' || contains(github.event.pull_request.changed_files, 'docker/') || contains(github.event.pull_request.changed_files, '**.go') || contains(github.event.pull_request.changed_files, 'pkg/') || contains(github.event.pull_request.changed_files, 'cmd/') || contains(github.event.pull_request.changed_files, 'go.mod') || contains(github.event.pull_request.changed_files, 'go.sum'))
runs-on: ubuntu-latest
strategy:
matrix:
service:
- kthena-router
- kthena-controller-manager
steps:
- name: Checkout
uses: actions/checkout@v5

- name: Extract Docker image metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository_owner }}/${{ matrix.service }}
tags: |
type=ref,event=branch
type=semver,pattern={{version}}
type=raw,value=latest,enable={{is_default_branch}}

- name: Log in to Github Container Registry
uses: docker/login-action@v3
if: github.event_name != 'pull_request'
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: .
file: ./docker/Dockerfile.${{ matrix.service }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
annotations: ${{ steps.meta.outputs.annotations }}
provenance: true
sbom: true

build-python:
if: github.repository == 'volcano-sh/kthena' && (github.event_name != 'pull_request' || contains(github.event.pull_request.changed_files, 'docker/Dockerfile.downloader') || contains(github.event.pull_request.changed_files, 'docker/Dockerfile.runtime') || contains(github.event.pull_request.changed_files, 'python/'))
runs-on: ubuntu-latest
strategy:
matrix:
service: [ downloader, runtime ]
steps:
- name: Checkout
uses: actions/checkout@v5

- name: Extract Docker image metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository_owner }}/${{matrix.service}}
tags: |
type=ref,event=branch
type=semver,pattern={{version}}
type=raw,value=latest,enable={{is_default_branch}}

- name: Log in to Github Container Registry
uses: docker/login-action@v3
if: github.event_name != 'pull_request'
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: .
file: ./docker/Dockerfile.${{ matrix.service }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
annotations: ${{ steps.meta.outputs.annotations }}
provenance: true
sbom: true
2 changes: 1 addition & 1 deletion .github/workflows/python-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ jobs:
coverage run -m pytest
- name: Check coverage threshold
run: |
coverage report --fail-under=80 # Set coverage threshold
coverage report --fail-under=60 # Set coverage threshold
25 changes: 24 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ build: generate fmt vet

IMG_CONTROLLER ?= ${HUB}/kthena-controller-manager:${TAG}
IMG_ROUTER ?= ${HUB}/kthena-router:${TAG}
IMG_DOWNLOADER ?= ${HUB}/downloader:${TAG}
IMG_RUNTIME ?= ${HUB}/runtime:${TAG}

.PHONY: docker-build-router
docker-build-router: generate
Expand All @@ -129,10 +131,21 @@ docker-build-router: generate
docker-build-controller: generate
$(CONTAINER_TOOL) build -t ${IMG_CONTROLLER} -f docker/Dockerfile.kthena-controller-manager .

.PHONY: docker-build-downloader
docker-build-downloader: generate
$(CONTAINER_TOOL) build -t ${IMG_DOWNLOADER} -f docker/Dockerfile.downloader .

.PHONY: docker-build-runtime
docker-build-runtime: generate
$(CONTAINER_TOOL) build -t ${IMG_RUNTIME} -f docker/Dockerfile.runtime .


.PHONY: docker-push
docker-push: docker-build-router docker-build-controller ## Push all images to the registry.
docker-push: docker-build-router docker-build-controller docker-build-downloader docker-build-runtime ## Push all images to the registry.
$(CONTAINER_TOOL) push ${IMG_ROUTER}
$(CONTAINER_TOOL) push ${IMG_CONTROLLER}
$(CONTAINER_TOOL) push ${IMG_DOWNLOADER}
$(CONTAINER_TOOL) push ${IMG_RUNTIME}

# PLATFORMS defines the target platforms for the images be built to provide support to multiple
# architectures.
Expand All @@ -154,6 +167,16 @@ docker-buildx: ## Build and push docker image for cross-platform support
-t ${IMG_CONTROLLER} \
-f docker/Dockerfile.kthena-controller-manager \
--push .
$(CONTAINER_TOOL) buildx build \
--platform ${PLATFORMS} \
-t ${IMG_DOWNLOADER} \
-f docker/Dockerfile.downloader \
--push .
$(CONTAINER_TOOL) buildx build \
--platform ${PLATFORMS} \
-t ${IMG_RUNTIME} \
-f docker/Dockerfile.runtime \
--push .

##@ Deployment

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ spec:
properties:
optimizerConfiguration:
description: |-
It dynamically schedules replicas across different ModelBooster Infer groups based on overall computing power requirements - referred to as "optimize" behavior in the code.
It dynamically schedules replicas across different Model Serving groups based on overall computing power requirements - referred to as "optimize" behavior in the code.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is duplicate with @git-malu 's pr

Copy link
Contributor Author

@zhiweideren zhiweideren Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes,ci error. I executed "make generate" based on the latest main branch, and it was auto - generated.

For example:
When dealing with two types of ModelBooster Infer instances corresponding to heterogeneous hardware resources with different computing capabilities (e.g., H100/A100), the "optimize" behavior aims to:
When dealing with two types of Model Serving instances corresponding to heterogeneous hardware resources with different computing capabilities (e.g., H100/A100), the "optimize" behavior aims to:
Dynamically adjust the deployment ratio of H100/A100 instances based on real-time computing power demands
Use integer programming and similar methods to precisely meet computing requirements
Maximize hardware utilization efficiency
Expand All @@ -58,8 +58,8 @@ spec:
minimum: 0
type: integer
params:
description: Parameters of multiple ModelBooster Infer Groups
to be optimized.
description: Parameters of multiple Model Serving Groups to be
optimized.
items:
properties:
cost:
Expand Down
27 changes: 27 additions & 0 deletions docker/Dockerfile.downloader
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Build the manager binary
FROM python:3.12-slim

RUN apt-get update && \
apt-get install -y curl unzip rsync && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN set -ex && \
ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then \
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"; \
elif [ "$ARCH" = "aarch64" ]; then \
curl "https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip" -o "awscliv2.zip"; \
else \
echo "Unsupported architecture: $ARCH" && exit 1; \
fi && \
unzip awscliv2.zip && \
./aws/install && \
rm -rf awscliv2.zip aws

WORKDIR /app
COPY python/kthena/downloader/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY python/kthena/downloader/ ./kthena/downloader/
ENV PYTHONPATH="/app"
ENTRYPOINT ["python", "kthena/downloader/app.py"]
26 changes: 26 additions & 0 deletions docker/Dockerfile.runtime
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Build the manager binary
FROM python:3.12-slim
RUN apt-get update && \
apt-get install -y curl unzip rsync && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN set -ex && \
ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then \
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"; \
elif [ "$ARCH" = "aarch64" ]; then \
curl "https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip" -o "awscliv2.zip"; \
else \
echo "Unsupported architecture: $ARCH" && exit 1; \
fi && \
unzip awscliv2.zip && \
./aws/install && \
rm -rf awscliv2.zip aws

WORKDIR /app
COPY python/kthena/downloader/ ./kthena/downloader/
COPY python/kthena/runtime/ ./kthena/runtime/
RUN pip install --no-cache-dir -r kthena/runtime/requirements.txt
ENV PYTHONPATH="/app"
ENTRYPOINT ["python","kthena/runtime/app.py"]
4 changes: 2 additions & 2 deletions docs/kthena/docs/reference/crd/workload.serving.volcano.sh.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `policyRef` _[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.33/#localobjectreference-v1-core)_ | PolicyRef references the autoscaling policy to be optimized scaling base on multiple targets. | | |
| `optimizerConfiguration` _[OptimizerConfiguration](#optimizerconfiguration)_ | It dynamically schedules replicas across different ModelBooster Infer groups based on overall computing power requirements - referred to as "optimize" behavior in the code.<br />For example:<br />When dealing with two types of ModelBooster Infer instances corresponding to heterogeneous hardware resources with different computing capabilities (e.g., H100/A100), the "optimize" behavior aims to:<br />Dynamically adjust the deployment ratio of H100/A100 instances based on real-time computing power demands<br />Use integer programming and similar methods to precisely meet computing requirements<br />Maximize hardware utilization efficiency | | |
| `optimizerConfiguration` _[OptimizerConfiguration](#optimizerconfiguration)_ | It dynamically schedules replicas across different Model Serving groups based on overall computing power requirements - referred to as "optimize" behavior in the code.<br />For example:<br />When dealing with two types of Model Serving instances corresponding to heterogeneous hardware resources with different computing capabilities (e.g., H100/A100), the "optimize" behavior aims to:<br />Dynamically adjust the deployment ratio of H100/A100 instances based on real-time computing power demands<br />Use integer programming and similar methods to precisely meet computing requirements<br />Maximize hardware utilization efficiency | | |
| `scalingConfiguration` _[ScalingConfiguration](#scalingconfiguration)_ | Adjust the number of related instances based on specified monitoring metrics and their target values. | | |


Expand Down Expand Up @@ -620,7 +620,7 @@ _Appears in:_

| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `params` _[OptimizerParam](#optimizerparam) array_ | Parameters of multiple ModelBooster Infer Groups to be optimized. | | MinItems: 1 <br /> |
| `params` _[OptimizerParam](#optimizerparam) array_ | Parameters of multiple Model Serving Groups to be optimized. | | MinItems: 1 <br /> |
| `costExpansionRatePercent` _integer_ | CostExpansionRatePercent is the percentage rate at which the cost expands. | 200 | Minimum: 0 <br /> |


Expand Down
Loading
Loading