Skip to content

Commit a431268

Browse files
authored
Merge pull request #307 from leo-pony/ascend_cann_support
Add Ascend NPU CANN backend support
2 parents 4e0c9e8 + b10f108 commit a431268

20 files changed

+127
-23
lines changed

.github/workflows/release.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,18 @@ jobs:
179179
sbom: true
180180
provenance: mode=max
181181
tags: ${{ steps.tags.outputs.musa }}
182+
183+
- name: Build CANN image
184+
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25
185+
with:
186+
file: Dockerfile
187+
target: final-llamacpp
188+
platforms: linux/arm64, linux/amd64
189+
build-args: |
190+
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
191+
"LLAMA_SERVER_VARIANT=cann"
192+
"BASE_IMAGE=ascendai/cann:8.2.rc2-910b-ubuntu22.04-py3.11"
193+
push: true
194+
sbom: true
195+
provenance: mode=max
196+
tags: ${{ steps.tags.outputs.cann }}

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ ENV MODEL_RUNNER_PORT=12434
6969
ENV LLAMA_SERVER_PATH=/app/bin
7070
ENV HOME=/home/modelrunner
7171
ENV MODELS_PATH=/models
72-
ENV LD_LIBRARY_PATH=/app/lib
72+
ENV LD_LIBRARY_PATH=/app/lib:$LD_LIBRARY_PATH
7373

7474
# Label the image so that it's hidden on cloud engines.
7575
LABEL com.docker.desktop.service="model-runner"

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ Available variants:
225225
- `cuda`: CUDA-accelerated version for NVIDIA GPUs
226226
- `rocm`: ROCm-accelerated version for AMD GPUs
227227
- `musa`: MUSA-accelerated version for MTHREADS GPUs
228+
- `cann`: CANN-accelerated version for Ascend NPUs
228229

229230
The binary path in the image follows this pattern: `/com.docker.llama-server.native.linux.${LLAMA_SERVER_VARIANT}.${TARGETARCH}`
230231

cmd/cli/commands/install-runner.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,8 @@ func runInstallOrStart(cmd *cobra.Command, opts runnerOptions) error {
253253
gpu = gpupkg.GPUSupportROCm
254254
} else if opts.gpuMode == "musa" {
255255
gpu = gpupkg.GPUSupportMUSA
256+
} else if opts.gpuMode == "cann" {
257+
gpu = gpupkg.GPUSupportCANN
256258
} else if opts.gpuMode != "none" {
257259
return fmt.Errorf("unknown GPU specification: %q", opts.gpuMode)
258260
}
@@ -313,7 +315,7 @@ func newInstallRunner() *cobra.Command {
313315
c.Flags().Uint16Var(&port, "port", 0,
314316
"Docker container port for Docker Model Runner (default: 12434 for Docker Engine, 12435 for Cloud mode)")
315317
c.Flags().StringVar(&host, "host", "127.0.0.1", "Host address to bind Docker Model Runner")
316-
c.Flags().StringVar(&gpuMode, "gpu", "auto", "Specify GPU support (none|auto|cuda|rocm|musa)")
318+
c.Flags().StringVar(&gpuMode, "gpu", "auto", "Specify GPU support (none|auto|cuda|rocm|musa|cann)")
317319
c.Flags().StringVar(&backend, "backend", "", backendUsage)
318320
c.Flags().BoolVar(&doNotTrack, "do-not-track", false, "Do not track models usage in Docker Model Runner")
319321
return c

cmd/cli/commands/reinstall-runner.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ func newReinstallRunner() *cobra.Command {
3030
c.Flags().Uint16Var(&port, "port", 0,
3131
"Docker container port for Docker Model Runner (default: 12434 for Docker Engine, 12435 for Cloud mode)")
3232
c.Flags().StringVar(&host, "host", "127.0.0.1", "Host address to bind Docker Model Runner")
33-
c.Flags().StringVar(&gpuMode, "gpu", "auto", "Specify GPU support (none|auto|cuda|musa)")
33+
c.Flags().StringVar(&gpuMode, "gpu", "auto", "Specify GPU support (none|auto|cuda|musa|rocm|cann)")
3434
c.Flags().StringVar(&backend, "backend", "", backendUsage)
3535
c.Flags().BoolVar(&doNotTrack, "do-not-track", false, "Do not track models usage in Docker Model Runner")
3636
return c

cmd/cli/commands/restart-runner.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ func newRestartRunner() *cobra.Command {
3636
c.Flags().Uint16Var(&port, "port", 0,
3737
"Docker container port for Docker Model Runner (default: 12434 for Docker Engine, 12435 for Cloud mode)")
3838
c.Flags().StringVar(&host, "host", "127.0.0.1", "Host address to bind Docker Model Runner")
39-
c.Flags().StringVar(&gpuMode, "gpu", "auto", "Specify GPU support (none|auto|cuda|musa)")
39+
c.Flags().StringVar(&gpuMode, "gpu", "auto", "Specify GPU support (none|auto|cuda|musa|rocm|cann)")
4040
c.Flags().BoolVar(&doNotTrack, "do-not-track", false, "Do not track models usage in Docker Model Runner")
4141
return c
4242
}

cmd/cli/commands/start-runner.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func newStartRunner() *cobra.Command {
2626
}
2727
c.Flags().Uint16Var(&port, "port", 0,
2828
"Docker container port for Docker Model Runner (default: 12434 for Docker Engine, 12435 for Cloud mode)")
29-
c.Flags().StringVar(&gpuMode, "gpu", "auto", "Specify GPU support (none|auto|cuda|musa)")
29+
c.Flags().StringVar(&gpuMode, "gpu", "auto", "Specify GPU support (none|auto|cuda|musa|rocm|cann)")
3030
c.Flags().StringVar(&backend, "backend", "", backendUsage)
3131
c.Flags().BoolVar(&doNotTrack, "do-not-track", false, "Do not track models usage in Docker Model Runner")
3232
return c

cmd/cli/docs/reference/docker_model_install-runner.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ options:
2828
- option: gpu
2929
value_type: string
3030
default_value: auto
31-
description: Specify GPU support (none|auto|cuda|rocm|musa)
31+
description: Specify GPU support (none|auto|cuda|rocm|musa|cann)
3232
deprecated: false
3333
hidden: false
3434
experimental: false

cmd/cli/docs/reference/docker_model_reinstall-runner.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ options:
2828
- option: gpu
2929
value_type: string
3030
default_value: auto
31-
description: Specify GPU support (none|auto|cuda|musa)
31+
description: Specify GPU support (none|auto|cuda|musa|rocm|cann)
3232
deprecated: false
3333
hidden: false
3434
experimental: false

cmd/cli/docs/reference/docker_model_restart-runner.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ options:
2121
- option: gpu
2222
value_type: string
2323
default_value: auto
24-
description: Specify GPU support (none|auto|cuda|musa)
24+
description: Specify GPU support (none|auto|cuda|musa|rocm|cann)
2525
deprecated: false
2626
hidden: false
2727
experimental: false

0 commit comments

Comments
 (0)