Skip to content

Commit 395aec0

Browse files
authored
Merge branch 'ggerganov:master' into master
2 parents d217eb7 + 642330a commit 395aec0

38 files changed

+1379
-1332
lines changed

.devops/llama-server.Dockerfile

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,34 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential git libcurl4-openssl-dev
6+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
77

88
WORKDIR /app
99

1010
COPY . .
1111

12-
ENV LLAMA_CURL=1
1312

14-
RUN make -j$(nproc) llama-server
13+
RUN \
14+
# Build multiple versions of the CPU backend
15+
scripts/build-cpu.sh avx -DGGML_AVX=ON -DGGML_AVX2=OFF && \
16+
scripts/build-cpu.sh avx2 -DGGML_AVX=ON -DGGML_AVX2=ON && \
17+
scripts/build-cpu.sh avx512 -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
18+
scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
19+
# Build llama-server
20+
cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
21+
cmake --build build --target llama-server -j $(nproc) && \
22+
# Copy the built libraries to /app/lib
23+
mkdir -p /app/lib && \
24+
mv libggml-cpu* /app/lib/ && \
25+
find build -name "*.so" -exec cp {} /app/lib/ \;
1526

1627
FROM ubuntu:$UBUNTU_VERSION AS runtime
1728

1829
RUN apt-get update && \
1930
apt-get install -y libcurl4-openssl-dev libgomp1 curl
2031

21-
COPY --from=build /app/llama-server /llama-server
32+
COPY --from=build /app/build/bin/llama-server /llama-server
33+
COPY --from=build /app/lib/ /
2234

2335
ENV LC_ALL=C.utf8
2436
# Must be set to 0.0.0.0 so it can listen to requests from host machine

.github/pull_request_template.md

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1 @@
1-
2-
3-
- [x] I have read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md)
4-
- Self-reported review complexity:
5-
- [ ] Low
6-
- [ ] Medium
7-
- [ ] High
1+
*Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*

.github/workflows/build.yml

Lines changed: 29 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -153,66 +153,6 @@ jobs:
153153
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
154154
name: llama-bin-macos-x64.zip
155155

156-
ubuntu-focal-make:
157-
runs-on: ubuntu-20.04
158-
env:
159-
LLAMA_NODE_AVAILABLE: true
160-
LLAMA_PYTHON_AVAILABLE: true
161-
162-
steps:
163-
- name: Clone
164-
id: checkout
165-
uses: actions/checkout@v4
166-
167-
- name: Dependencies
168-
id: depends
169-
run: |
170-
sudo apt-get update
171-
sudo apt-get install build-essential gcc-8
172-
173-
- uses: actions/setup-node@v4
174-
with:
175-
node-version: "20"
176-
177-
- uses: actions/setup-python@v5
178-
with:
179-
python-version: "3.11"
180-
181-
- name: Build
182-
id: make_build
183-
env:
184-
LLAMA_FATAL_WARNINGS: 1
185-
run: |
186-
CC=gcc-8 make -j $(nproc)
187-
188-
- name: Test
189-
id: make_test
190-
run: |
191-
CC=gcc-8 make tests -j $(nproc)
192-
make test -j $(nproc)
193-
194-
ubuntu-focal-make-curl:
195-
runs-on: ubuntu-20.04
196-
197-
steps:
198-
- name: Clone
199-
id: checkout
200-
uses: actions/checkout@v4
201-
202-
- name: Dependencies
203-
id: depends
204-
run: |
205-
sudo apt-get update
206-
sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev
207-
208-
- name: Build
209-
id: make_build
210-
env:
211-
LLAMA_FATAL_WARNINGS: 1
212-
LLAMA_CURL: 1
213-
run: |
214-
CC=gcc-8 make -j $(nproc)
215-
216156
ubuntu-latest-cmake:
217157
runs-on: ubuntu-latest
218158

@@ -510,36 +450,6 @@ jobs:
510450
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON ..
511451
cmake --build . --config Release -j $(nproc)
512452
513-
# TODO: build with GGML_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
514-
# how to debug it.
515-
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124
516-
macOS-latest-make:
517-
runs-on: macos-latest
518-
519-
steps:
520-
- name: Clone
521-
id: checkout
522-
uses: actions/checkout@v4
523-
524-
- name: Dependencies
525-
id: depends
526-
continue-on-error: true
527-
run: |
528-
brew update
529-
530-
- name: Build
531-
id: make_build
532-
env:
533-
LLAMA_FATAL_WARNINGS: 1
534-
run: |
535-
GGML_NO_METAL=1 make -j $(sysctl -n hw.logicalcpu)
536-
537-
- name: Test
538-
id: make_test
539-
run: |
540-
GGML_NO_METAL=1 make tests -j $(sysctl -n hw.logicalcpu)
541-
GGML_NO_METAL=1 make test -j $(sysctl -n hw.logicalcpu)
542-
543453
# TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
544454
# how to debug it.
545455
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
@@ -635,33 +545,35 @@ jobs:
635545
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
636546
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
637547
638-
macOS-latest-swift:
639-
runs-on: macos-latest
640-
641-
strategy:
642-
matrix:
643-
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
644-
645-
steps:
646-
- name: Clone
647-
id: checkout
648-
uses: actions/checkout@v4
649-
650-
- name: Dependencies
651-
id: depends
652-
continue-on-error: true
653-
run: |
654-
brew update
655-
656-
- name: xcodebuild for swift package
657-
id: xcodebuild
658-
run: |
659-
xcodebuild -scheme llama -destination "${{ matrix.destination }}"
660-
661-
- name: Build Swift Example
662-
id: make_build_swift_example
663-
run: |
664-
make swift
548+
# TODO: tmp disabled. see for possible re-enable:
549+
# https://github.com/ggerganov/llama.cpp/pull/10525
550+
# macOS-latest-swift:
551+
# runs-on: macos-latest
552+
#
553+
# strategy:
554+
# matrix:
555+
# destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
556+
#
557+
# steps:
558+
# - name: Clone
559+
# id: checkout
560+
# uses: actions/checkout@v4
561+
#
562+
# - name: Dependencies
563+
# id: depends
564+
# continue-on-error: true
565+
# run: |
566+
# brew update
567+
#
568+
# - name: xcodebuild for swift package
569+
# id: xcodebuild
570+
# run: |
571+
# xcodebuild -scheme llama -destination "${{ matrix.destination }}"
572+
#
573+
# - name: Build Swift Example
574+
# id: make_build_swift_example
575+
# run: |
576+
# make swift
665577

666578
windows-msys2:
667579
runs-on: windows-latest
@@ -688,21 +600,6 @@ jobs:
688600
mingw-w64-${{matrix.env}}-cmake
689601
mingw-w64-${{matrix.env}}-openblas
690602
691-
- name: Build using make
692-
shell: msys2 {0}
693-
run: |
694-
make -j $(nproc)
695-
696-
- name: Clean after building using make
697-
shell: msys2 {0}
698-
run: |
699-
make clean
700-
701-
- name: Build using make w/ OpenBLAS
702-
shell: msys2 {0}
703-
run: |
704-
make GGML_OPENBLAS=1 -j $(nproc)
705-
706603
- name: Build using CMake
707604
shell: msys2 {0}
708605
run: |
@@ -1250,9 +1147,7 @@ jobs:
12501147
runs-on: ubuntu-latest
12511148

12521149
needs:
1253-
- ubuntu-focal-make
12541150
- ubuntu-latest-cmake
1255-
- macOS-latest-make
12561151
- macOS-latest-cmake
12571152
- windows-latest-cmake
12581153
- windows-2019-cmake-cuda

CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,6 @@ if (NOT DEFINED GGML_LLAMAFILE)
111111
set(GGML_LLAMAFILE_DEFAULT ON)
112112
endif()
113113

114-
if (NOT DEFINED GGML_AMX)
115-
set(GGML_AMX ON)
116-
endif()
117-
118114
if (NOT DEFINED GGML_CUDA_GRAPHS)
119115
set(GGML_CUDA_GRAPHS_DEFAULT ON)
120116
endif()

CODEOWNERS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs
2+
3+
ci/ @ggerganov

CONTRIBUTING.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
# Pull requests (for contributors)
22

33
- Test your changes:
4-
- Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the `ggml` library
54
- Execute [the full CI locally on your machine](ci/README.md) before publishing
6-
- Optionally rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs
5+
- Verify that the perplexity and the performance are not affected negatively by your changes (use `llama-perplexity` and `llama-bench`)
6+
- If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends)
7+
- If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops`
78
- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
89
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
910

@@ -12,6 +13,7 @@
1213
- Squash-merge PRs
1314
- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
1415
- Optionally pick a `<module>` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules
16+
- Consider adding yourself to [CODEOWNERS](CODEOWNERS)
1517

1618
# Coding guidelines
1719

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
ifndef LLAMA_MAKEFILE
2+
$(error The Makefile build is deprecated. Use the CMake build instead. For more details, see https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md)
3+
endif
4+
15
# Define the default target now so that it is always the first target
26
BUILD_TARGETS = \
37
libllava.a \

Package.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,5 +88,5 @@ let package = Package(
8888
linkerSettings: linkerSettings
8989
)
9090
],
91-
cxxLanguageStandard: .cxx11
91+
cxxLanguageStandard: .cxx17
9292
)

0 commit comments

Comments
 (0)