Skip to content

Commit c25ac30

Browse files
author
ochafik
committed
Merge remote-tracking branch 'origin/master' into date
2 parents dc788b4 + 8d66005 commit c25ac30

File tree

148 files changed

+12616
-7734
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

148 files changed

+12616
-7734
lines changed

.devops/cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ COPY . .
2121
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2222
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
2323
fi && \
24-
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
24+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
2525
cmake --build build --config Release -j$(nproc)
2626

2727
RUN mkdir -p /app/lib && \

.devops/intel.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
1717
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
1818
fi && \
1919
echo "Building with dynamic libs" && \
20-
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
20+
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${OPT_SYCL_F16} && \
2121
cmake --build build --config Release -j$(nproc)
2222

2323
RUN mkdir -p /app/lib && \

.devops/llama-cli-cann.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
1+
ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10
22

33
FROM ascendai/cann:$ASCEND_VERSION AS build
44

55
WORKDIR /app
66

77
COPY . .
88

9-
RUN yum install -y gcc g++ cmake make
9+
RUN yum install -y gcc g++ cmake make libcurl-devel
1010
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
1111
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
1212
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}

.devops/musa.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ COPY . .
3535
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
3636
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
3737
fi && \
38-
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
38+
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
3939
cmake --build build --config Release -j$(nproc)
4040

4141
RUN mkdir -p /app/lib && \

.devops/rocm.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1717
# gfx906 is deprecated
1818
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
1919

20-
#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21-
ARG ROCM_DOCKER_ARCH=gfx1100
20+
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21+
#ARG ROCM_DOCKER_ARCH=gfx1100
2222

2323
# Set nvcc architectured
2424
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
@@ -40,7 +40,7 @@ WORKDIR /app
4040
COPY . .
4141

4242
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
43-
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release \
43+
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
4444
&& cmake --build build --config Release -j$(nproc)
4545

4646
RUN mkdir -p /app/lib \

.devops/vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ WORKDIR /app
1616

1717
COPY . .
1818

19-
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
19+
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
2020
cmake --build build --config Release -j$(nproc)
2121

2222
RUN mkdir -p /app/lib && \

.github/workflows/build.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1766,16 +1766,17 @@ jobs:
17661766
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
17671767
defaults:
17681768
run:
1769-
shell: bash -el {0}
1770-
runs-on: ubuntu-24.04-arm
1769+
shell: bash -el {0}
17711770
strategy:
17721771
matrix:
1772+
arch: [x86, aarch64]
17731773
cann:
1774-
- '8.0.rc3.beta1-910b-openeuler22.03-py3.10'
1774+
- '8.1.RC1.alpha001-910b-openeuler22.03-py3.10'
17751775
device:
17761776
- 'ascend910b3'
17771777
build:
17781778
- 'Release'
1779+
runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
17791780
container: ascendai/cann:${{ matrix.cann }}
17801781
steps:
17811782
- name: Checkout
@@ -1784,7 +1785,7 @@ jobs:
17841785
- name: Dependencies
17851786
run: |
17861787
yum update -y
1787-
yum install -y git gcc gcc-c++ make cmake
1788+
yum install -y git gcc gcc-c++ make cmake libcurl-devel
17881789
17891790
- name: Build
17901791
run: |

.github/workflows/docker.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,13 @@ jobs:
3636
matrix:
3737
config:
3838
# Multi-stage build
39-
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
40-
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
41-
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
42-
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
43-
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
39+
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
40+
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
41+
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
42+
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
43+
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
4444
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
45-
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
45+
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true }
4646
steps:
4747
- name: Check out the repo
4848
uses: actions/checkout@v4

Makefile

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -780,10 +780,6 @@ ifdef GGML_HIP
780780

781781
MK_CPPFLAGS += -DGGML_USE_HIP -DGGML_USE_CUDA
782782

783-
ifdef GGML_HIP_UMA
784-
MK_CPPFLAGS += -DGGML_HIP_UMA
785-
endif # GGML_HIP_UMA
786-
787783
MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
788784
MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
789785
MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas

README.md

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,6 @@
99

1010
Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++
1111

12-
> [!IMPORTANT]
13-
> New `llama.cpp` package location: [ggml-org/llama.cpp](https://github.com/ggml-org/llama.cpp/pkgs/container/llama.cpp)
14-
>
15-
> Update your container URLs to: `ghcr.io/ggml-org/llama.cpp`
16-
>
17-
> More info: https://github.com/ggml-org/llama.cpp/discussions/11801
18-
1912
## Recent API changes
2013

2114
- [Changelog for `libllama` API](https://github.com/ggml-org/llama.cpp/issues/9289)
@@ -104,6 +97,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
10497
- [x] [Flan T5](https://huggingface.co/models?search=flan-t5)
10598
- [x] [Open Elm models](https://huggingface.co/collections/apple/openelm-instruct-models-6619ad295d7ae9f868b759ca)
10699
- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b) + [GLMEdge-1.5b](https://huggingface.co/THUDM/glm-edge-1.5b-chat) + [GLMEdge-4b](https://huggingface.co/THUDM/glm-edge-4b-chat)
100+
- [x] [GLM-4-0414](https://huggingface.co/collections/THUDM/glm-4-0414-67f3cbcb34dd9d252707cb2e)
107101
- [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966)
108102
- [x] [EXAONE-3.0-7.8B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct)
109103
- [x] [FalconMamba Models](https://huggingface.co/collections/tiiuae/falconmamba-7b-66b9a580324dd1598b0f6d4a)
@@ -247,6 +241,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
247241
| [Vulkan](docs/build.md#vulkan) | GPU |
248242
| [CANN](docs/build.md#cann) | Ascend NPU |
249243
| [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
244+
| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/examples/rpc) | All |
250245

251246
## Building the project
252247

@@ -265,7 +260,9 @@ The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](htt
265260
- [Trending](https://huggingface.co/models?library=gguf&sort=trending)
266261
- [LLaMA](https://huggingface.co/models?sort=trending&search=llama+gguf)
267262

268-
You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf <user>/<model>[:quant]`
263+
You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from [Hugging Face](https://huggingface.co/) or other model hosting sites, such as [ModelScope](https://modelscope.cn/), by using this CLI argument: `-hf <user>/<model>[:quant]`.
264+
265+
By default, the CLI would download from Hugging Face, you can switch to other options with the environment variable `MODEL_ENDPOINT`. For example, you may opt to downloading model checkpoints from ModelScope or other model sharing communities by setting the environment variable, e.g. `MODEL_ENDPOINT=https://www.modelscope.cn/`.
269266

270267
After downloading a model, use the CLI tools to run it locally - see below.
271268

0 commit comments

Comments
 (0)