Skip to content

Commit 22c7f85

Browse files
committed
Merge branch 'master' into HEAD
2 parents 07b809b + 254098a commit 22c7f85

File tree

107 files changed

+4374
-2793
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+4374
-2793
lines changed

.devops/cann.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Define the CANN base image for easier version updates later
66
ARG CHIP_TYPE=910b
7-
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc1.alpha001-${CHIP_TYPE}-openeuler22.03-py3.11
7+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.11
88

99
# ==============================================================================
1010
# BUILD STAGE
@@ -111,7 +111,7 @@ ENTRYPOINT ["/app/tools.sh"]
111111
# ==============================================================================
112112
FROM base AS light
113113

114-
COPY --from=build /app/full/llama-cli /app
114+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
115115

116116
ENTRYPOINT [ "/app/llama-cli" ]
117117

.devops/cpu.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ ENTRYPOINT ["/app/tools.sh"]
6868
### Light, CLI only
6969
FROM base AS light
7070

71-
COPY --from=build /app/full/llama-cli /app
71+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7272

7373
WORKDIR /app
7474

.devops/cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ ENTRYPOINT ["/app/tools.sh"]
7474
### Light, CLI only
7575
FROM base AS light
7676

77-
COPY --from=build /app/full/llama-cli /app
77+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7878

7979
WORKDIR /app
8080

.devops/intel.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ ENTRYPOINT ["/app/tools.sh"]
7373
FROM base AS light
7474

7575
COPY --from=build /app/lib/ /app
76-
COPY --from=build /app/full/llama-cli /app
76+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7777

7878
WORKDIR /app
7979

.devops/musa.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ ENTRYPOINT ["/app/tools.sh"]
8181
### Light, CLI only
8282
FROM base AS light
8383

84-
COPY --from=build /app/full/llama-cli /app
84+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
8585

8686
WORKDIR /app
8787

.devops/rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ ENTRYPOINT ["/app/tools.sh"]
9494
### Light, CLI only
9595
FROM base AS light
9696

97-
COPY --from=build /app/full/llama-cli /app
97+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
9898

9999
WORKDIR /app
100100

.devops/s390x.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ WORKDIR /llama.cpp/bin
105105

106106
# Copy llama.cpp binaries and libraries
107107
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
108-
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
108+
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin/llama-completion /llama.cpp/bin
109109

110110
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
111111

.devops/tools.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
1313
exec ./llama-quantize "$@"
1414
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
1515
exec ./llama-cli "$@"
16+
elif [[ "$arg1" == '--run-legacy' || "$arg1" == '-l' ]]; then
17+
exec ./llama-completion "$@"
1618
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
1719
exec ./llama-bench "$@"
1820
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
@@ -32,8 +34,10 @@ elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
3234
else
3335
echo "Unknown command: $arg1"
3436
echo "Available commands: "
35-
echo " --run (-r): Run a model previously converted into ggml"
36-
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
37+
echo " --run (-r): Run a model (chat) previously converted into ggml"
38+
echo " ex: -m /models/7B/ggml-model-q4_0.bin"
39+
echo " --run-legacy (-l): Run a model (legacy completion) previously converted into ggml"
40+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -no-cnv -p \"Building a website can be done in 10 simple steps:\" -n 512"
3741
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
3842
echo " ex: -m model.gguf"
3943
echo " --perplexity (-p): Measure the perplexity of a model over a given text."

.devops/vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ ENTRYPOINT ["/app/tools.sh"]
6868
### Light, CLI only
6969
FROM base AS light
7070

71-
COPY --from=build /app/full/llama-cli /app
71+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7272

7373
WORKDIR /app
7474

.github/workflows/build.yml

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ on:
2020
'**/*.swift',
2121
'**/*.m',
2222
'**/*.metal',
23-
'**/*.comp'
23+
'**/*.comp',
24+
'**/*.glsl'
2425
]
2526

2627
pull_request:
@@ -40,7 +41,8 @@ on:
4041
'**/*.swift',
4142
'**/*.m',
4243
'**/*.metal',
43-
'**/*.comp'
44+
'**/*.comp',
45+
'**/*.glsl'
4446
]
4547

4648
concurrency:
@@ -1400,25 +1402,54 @@ jobs:
14001402
chip_type: ['910b', '310p']
14011403
build: ['Release']
14021404
runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
1403-
container: ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc1.alpha001-910b-openeuler22.03-py3.11' || '8.2.rc1-310p-openeuler22.03-py3.11' }}
14041405
steps:
14051406
- name: Checkout
14061407
uses: actions/checkout@v4
1408+
with:
1409+
fetch-depth: 0
14071410

1408-
- name: Dependencies
1409-
run: |
1410-
yum update -y
1411-
yum install -y git gcc gcc-c++ make cmake libcurl-devel
1411+
- name: Free up disk space
1412+
uses: ggml-org/[email protected]
1413+
with:
1414+
tool-cache: true
14121415

1413-
- name: Build
1416+
- name: Set container image
1417+
id: cann-image
14141418
run: |
1415-
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
1419+
image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}"
1420+
echo "image=${image}" >> "${GITHUB_OUTPUT}"
14161421
1417-
cmake -S . -B build \
1418-
-DCMAKE_BUILD_TYPE=${{ matrix.build }} \
1419-
-DGGML_CANN=on \
1420-
-DSOC_TYPE=ascend${{ matrix.chip_type }}
1421-
cmake --build build -j $(nproc)
1422+
- name: Pull container image
1423+
run: docker pull "${{ steps.cann-image.outputs.image }}"
1424+
1425+
- name: Build
1426+
env:
1427+
BUILD_TYPE: ${{ matrix.build }}
1428+
SOC_TYPE: ascend${{ matrix.chip_type }}
1429+
run: |
1430+
HOST_UID=$(id -u)
1431+
HOST_GID=$(id -g)
1432+
1433+
docker run --rm \
1434+
-v "${PWD}:/workspace" \
1435+
-w /workspace \
1436+
-e SOC_TYPE=${SOC_TYPE} \
1437+
-e BUILD_TYPE=${BUILD_TYPE} \
1438+
"${{ steps.cann-image.outputs.image }}" \
1439+
bash -lc '
1440+
set -e
1441+
yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake libcurl-devel
1442+
yum clean all && rm -rf /var/cache/yum
1443+
git config --global --add safe.directory "/workspace"
1444+
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
1445+
cmake -S . -B build \
1446+
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
1447+
-DGGML_CANN=on \
1448+
-DSOC_TYPE=${SOC_TYPE}
1449+
cmake --build build -j $(nproc)
1450+
1451+
chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build
1452+
'
14221453
14231454
# TODO: simplify the following workflows using a matrix
14241455
# TODO: run lighter CI on PRs and the full CI only on master (if needed)

0 commit comments

Comments
 (0)