Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
2b8525d
Handle missing model in CLI parameters for llama-run (#11399)
engelmi Jan 28, 2025
6e84b0a
SYCL : SOFTMAX F16 mask support and other fixes (#11261)
qnixsynapse Jan 28, 2025
f643120
docker: add perplexity and bench commands to full image (#11438)
rare-magma Jan 28, 2025
4bf3119
cmake : don't fail on `GGML_CPU=OFF` (#11457)
someone13574 Jan 28, 2025
d7d1ecc
docker: allow installing pip packages system-wide (#11437)
rare-magma Jan 28, 2025
7fee288
Add github protocol pulling and http:// (#11465)
ericcurtin Jan 28, 2025
cae9fb4
HIP: Only call rocblas_initialize on rocblas versions with the multip…
sARY77 Jan 28, 2025
be5ef79
HIP: Supress transformation warning in softmax.cu
IMbackK Jan 28, 2025
d0c0804
ci : fix build CPU arm64 (#11472)
ngxson Jan 28, 2025
cf8cc85
server : Fixed wrong function name in llamacpp server unit test (#11473)
peidaqi Jan 28, 2025
794fe23
cmake: add hints for locating ggml on Windows using Llama find-packag…
Emreerdog Jan 28, 2025
325afb3
llama: fix missing k_cache store for rwkv6qwen2 (#11445)
MollySophia Jan 29, 2025
b636228
embedding : enable --no-warmup option (#11475)
danbev Jan 29, 2025
d2e518e
ggml-cpu : fix ggml_graph_compute_thread did not terminate on abort. …
issixx Jan 17, 2025
1a0e87d
ggml : add option to not print stack on abort (ggml/1081)
WilliamTambellini Jan 23, 2025
8158577
sync : ggml
ggerganov Jan 29, 2025
f0d4b29
Parse https://ollama.com/library/ syntax (#11480)
ericcurtin Jan 29, 2025
2711d02
vulkan: Catch pipeline creation failure and print an error message (#…
jeffbolznv Jan 29, 2025
e51c47b
server : update auto gen files comments [no ci] (#11484)
danbev Jan 29, 2025
66ee4f2
vulkan: implement initial support for IQ2 and IQ3 quantizations (#11360)
remyoudompheng Jan 29, 2025
eb7cf15
server : add /apply-template endpoint for additional use cases of Min…
pnb Jan 29, 2025
e044976
server : update json snippets in README.md [no ci] (#11492)
danbev Jan 30, 2025
7919256
readme : reference examples relative links (#11505)
guspan-tanadi Jan 30, 2025
496e5bf
server : (docs) added response format for /apply-template [no ci] (#1…
isaac-mcfadyen Jan 30, 2025
4314e56
server : use lambda instead of std::bind (#11507)
danbev Jan 30, 2025
ffd0821
vocab : correctly identify LF token for GPT-2 style BPE tokenizer (#1…
mgroeber9110 Jan 30, 2025
3d804de
sync: minja (#11499)
ochafik Jan 30, 2025
c300e68
CUDA/HIP: add warp_size to cuda_device_info
IMbackK Jan 29, 2025
6af1ca4
HIP: Prepare reduction operators for wave 64
IMbackK Jan 29, 2025
27d135c
HIP: require at least HIP 5.5
IMbackK Jan 29, 2025
8b576b6
Tool call support (generic + native for Llama, Functionary, Hermes, M…
ochafik Jan 30, 2025
553f1e4
`ci`: ccache for all github worfklows (#11516)
ochafik Jan 30, 2025
a039ef4
docs: add OpenCL
lhez Feb 5, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .devops/tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
exec ./llama-quantize "$@"
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
exec ./llama-cli "$@"
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
exec ./llama-bench "$@"
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
exec ./llama-perplexity "$@"
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
echo "Converting PTH to GGML..."
for i in `ls $1/$2/ggml-model-f16.bin*`; do
for i in $(ls $1/$2/ggml-model-f16.bin*); do
if [ -f "${i/f16/q4_0}" ]; then
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
else
Expand All @@ -30,6 +34,10 @@ else
echo "Available commands: "
echo " --run (-r): Run a model previously converted into ggml"
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
echo " ex: -m model.gguf"
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
echo " ex: -m model.gguf -f file.txt"
echo " --convert (-c): Convert a llama model into ggml"
echo " ex: --outtype f16 \"/models/7B/\" "
echo " --quantize (-q): Optimize with quantization process ggml"
Expand Down
9 changes: 5 additions & 4 deletions .devops/vulkan.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG UBUNTU_VERSION=22.04
ARG UBUNTU_VERSION=24.04

FROM ubuntu:$UBUNTU_VERSION AS build

Expand All @@ -7,7 +7,7 @@ RUN apt update && apt install -y git build-essential cmake wget

# Install Vulkan SDK and cURL
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
apt update -y && \
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl

Expand Down Expand Up @@ -55,8 +55,9 @@ RUN apt-get update \
git \
python3 \
python3-pip \
&& pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt \
python3-wheel \
&& pip install --break-system-packages --upgrade setuptools \
&& pip install --break-system-packages -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
Expand Down
8 changes: 8 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,11 @@ indent_style = tab
[examples/cvector-generator/*.txt]
trim_trailing_whitespace = unset
insert_final_newline = unset

[models/templates/*.jinja]
indent_style = unset
indent_size = unset
end_of_line = unset
charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset
139 changes: 132 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ jobs:
with:
fetch-depth: 0

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: macOS-latest-cmake-arm64
evict-old-files: 1d

- name: Dependencies
id: depends
continue-on-error: true
Expand Down Expand Up @@ -108,6 +114,12 @@ jobs:
with:
fetch-depth: 0

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: macOS-latest-cmake-x64
evict-old-files: 1d

- name: Dependencies
id: depends
continue-on-error: true
Expand Down Expand Up @@ -172,6 +184,12 @@ jobs:
with:
fetch-depth: 0

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: ubuntu-cpu-cmake
evict-old-files: 1d

- name: Dependencies
id: depends
run: |
Expand Down Expand Up @@ -249,6 +267,12 @@ jobs:
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }}
evict-old-files: 1d

- name: Dependencies
id: depends
run: |
Expand Down Expand Up @@ -296,6 +320,12 @@ jobs:
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: ubuntu-latest-cmake-rpc
evict-old-files: 1d

- name: Dependencies
id: depends
run: |
Expand Down Expand Up @@ -325,6 +355,12 @@ jobs:
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: ubuntu-22-cmake-vulkan
evict-old-files: 1d

- name: Dependencies
id: depends
run: |
Expand All @@ -346,7 +382,8 @@ jobs:
id: cmake_test
run: |
cd build
ctest -L main --verbose --timeout 900
# This is using llvmpipe and runs slower than other backends
ctest -L main --verbose --timeout 1800

ubuntu-22-cmake-hip:
runs-on: ubuntu-22.04
Expand All @@ -363,6 +400,12 @@ jobs:
sudo apt-get update
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: ubuntu-22-cmake-hip
evict-old-files: 1d

- name: Build with native CMake HIP support
id: cmake_build
run: |
Expand Down Expand Up @@ -395,6 +438,12 @@ jobs:
apt-get update
apt-get install -y build-essential git cmake libcurl4-openssl-dev

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: ubuntu-22-cmake-musa
evict-old-files: 1d

- name: Build with native CMake MUSA support
id: cmake_build
run: |
Expand Down Expand Up @@ -434,6 +483,12 @@ jobs:
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: ubuntu-22-cmake-sycl
evict-old-files: 1d

- name: Build
id: cmake_build
run: |
Expand Down Expand Up @@ -478,6 +533,12 @@ jobs:
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: ubuntu-22-cmake-sycl-fp16
evict-old-files: 1d

- name: Build
id: cmake_build
run: |
Expand All @@ -499,6 +560,12 @@ jobs:
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: macOS-latest-cmake-ios
evict-old-files: 1d

- name: Dependencies
id: depends
continue-on-error: true
Expand Down Expand Up @@ -530,6 +597,12 @@ jobs:
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: macOS-latest-cmake-tvos
evict-old-files: 1d

- name: Dependencies
id: depends
continue-on-error: true
Expand Down Expand Up @@ -565,6 +638,12 @@ jobs:
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: macOS-latest-swift
evict-old-files: 1d

- name: Dependencies
id: depends
continue-on-error: true
Expand Down Expand Up @@ -606,6 +685,12 @@ jobs:
- name: Clone
uses: actions/checkout@v4

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: windows-msys2
evict-old-files: 1d

- name: Setup ${{ matrix.sys }}
uses: msys2/setup-msys2@v2
with:
Expand Down Expand Up @@ -674,6 +759,12 @@ jobs:
with:
fetch-depth: 0

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: windows-latest-cmake-${{ matrix.build }}
evict-old-files: 1d

- name: Clone Kompute submodule
id: clone_kompute
if: ${{ matrix.build == 'kompute-x64' }}
Expand Down Expand Up @@ -812,6 +903,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Install dependencies
env:
Expand All @@ -820,6 +913,12 @@ jobs:
apt update
apt install -y cmake build-essential ninja-build libgomp1 git

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: ubuntu-latest-cmake-cuda
evict-old-files: 1d

- name: Build with CMake
run: |
cmake -S . -B build -G Ninja \
Expand All @@ -846,6 +945,12 @@ jobs:
with:
fetch-depth: 0

- name: Install ccache
uses: hendrikmuhs/[email protected]
with:
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
evict-old-files: 1d

- name: Install Cuda Toolkit 11.7
if: ${{ matrix.cuda == '11.7' }}
run: |
Expand Down Expand Up @@ -902,11 +1007,6 @@ jobs:
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8

- name: Install ccache
uses: hendrikmuhs/[email protected]
with:
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}

- name: Install Ninja
id: install_ninja
run: |
Expand Down Expand Up @@ -986,6 +1086,12 @@ jobs:
with:
fetch-depth: 0

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: windows-latest-cmake-sycl
evict-old-files: 1d

- name: Install
run: |
scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
Expand Down Expand Up @@ -1065,9 +1171,10 @@ jobs:
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version

- name: Install ccache
uses: hendrikmuhs/[email protected]
uses: hendrikmuhs/[email protected].16
with:
key: ${{ github.job }}
evict-old-files: 1d

- name: Build
id: cmake_build
Expand Down Expand Up @@ -1097,6 +1204,12 @@ jobs:
with:
fetch-depth: 0

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: windows-latest-cmake-hip-release
evict-old-files: 1d

- name: Install
id: depends
run: |
Expand Down Expand Up @@ -1194,6 +1307,12 @@ jobs:
- name: Clone
uses: actions/checkout@v4

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: android-build
evict-old-files: 1d

- name: Set up JDK
uses: actions/setup-java@v3
with:
Expand Down Expand Up @@ -1231,6 +1350,12 @@ jobs:
with:
fetch-depth: 0

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: release
evict-old-files: 1d

- name: Determine tag name
id: tag
shell: bash
Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,15 @@ jobs:
push_to_registry:
name: Push Docker image to Docker Hub

runs-on: ubuntu-latest
runs-on: ubuntu-22.04
env:
COMMIT_SHA: ${{ github.sha }}
strategy:
fail-fast: false
matrix:
config:
# Multi-stage build
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/arm64", full: true, light: true, server: true, freediskspace: false}
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
Expand Down
Loading
Loading