Skip to content

Commit 26a8801

Browse files
committed
Merge branch 'master' into log_switch
2 parents b5f23c9 + 917786f commit 26a8801

File tree

88 files changed

+1790
-1350
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+1790
-1350
lines changed

.clang-tidy

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ Checks: >
1717
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
1818
performance-*,
1919
portability-*,
20+
-portability-simd-intrinsics,
2021
misc-*,
2122
-misc-const-correctness,
2223
-misc-non-private-member-variables-in-classes,
2324
-misc-no-recursion,
25+
-misc-use-anonymous-namespace,
2426
FormatStyle: none

.devops/llama-server.Dockerfile

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,34 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential git libcurl4-openssl-dev
6+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
77

88
WORKDIR /app
99

1010
COPY . .
1111

12-
ENV LLAMA_CURL=1
1312

14-
RUN make -j$(nproc) llama-server
13+
RUN \
14+
# Build multiple versions of the CPU backend
15+
scripts/build-cpu.sh avx -DGGML_AVX=ON -DGGML_AVX2=OFF && \
16+
scripts/build-cpu.sh avx2 -DGGML_AVX=ON -DGGML_AVX2=ON && \
17+
scripts/build-cpu.sh avx512 -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
18+
scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
19+
# Build llama-server
20+
cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
21+
cmake --build build --target llama-server -j $(nproc) && \
22+
# Copy the built libraries to /app/lib
23+
mkdir -p /app/lib && \
24+
mv libggml-cpu* /app/lib/ && \
25+
find build -name "*.so" -exec cp {} /app/lib/ \;
1526

1627
FROM ubuntu:$UBUNTU_VERSION AS runtime
1728

1829
RUN apt-get update && \
1930
apt-get install -y libcurl4-openssl-dev libgomp1 curl
2031

21-
COPY --from=build /app/llama-server /llama-server
32+
COPY --from=build /app/build/bin/llama-server /llama-server
33+
COPY --from=build /app/lib/ /
2234

2335
ENV LC_ALL=C.utf8
2436
# Must be set to 0.0.0.0 so it can listen to requests from host machine

.github/workflows/build.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1121,6 +1121,11 @@ jobs:
11211121
run: |
11221122
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
11231123
1124+
- name: Install ccache
1125+
uses: hendrikmuhs/[email protected]
1126+
with:
1127+
key: ${{ github.job }}
1128+
11241129
- name: Build
11251130
id: cmake_build
11261131
run: |

CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,6 @@ if (NOT DEFINED GGML_LLAMAFILE)
9696
set(GGML_LLAMAFILE_DEFAULT ON)
9797
endif()
9898

99-
if (NOT DEFINED GGML_AMX)
100-
set(GGML_AMX ON)
101-
endif()
102-
10399
if (NOT DEFINED GGML_CUDA_GRAPHS)
104100
set(GGML_CUDA_GRAPHS_DEFAULT ON)
105101
endif()

Makefile

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,11 @@ endif
251251
# Compile flags
252252
#
253253

254-
# keep standard at C11 and C++11
254+
# keep standard at C11 and C++17
255255
MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -DGGML_USE_CPU
256256
MK_CFLAGS = -std=c11 -fPIC
257-
MK_CXXFLAGS = -std=c++11 -fPIC
258-
MK_NVCCFLAGS = -std=c++11
257+
MK_CXXFLAGS = -std=c++17 -fPIC
258+
MK_NVCCFLAGS = -std=c++17
259259

260260
ifdef LLAMA_NO_CCACHE
261261
GGML_NO_CCACHE := 1
@@ -575,9 +575,12 @@ endif
575575

576576
ifndef GGML_NO_AMX
577577
MK_CPPFLAGS += -DGGML_USE_AMX
578-
OBJ_GGML_EXT += ggml/src/ggml-amx/ggml-amx.o ggml/src/ggml-amx/mmq.o
578+
OBJ_GGML_EXT += ggml/src/ggml-cpu/amx/amx.o ggml/src/ggml-cpu/amx/mmq.o
579579
endif
580580

581+
# only necessary for the CPU backend files
582+
MK_CPPFLAGS += -Iggml/src/ggml-cpu
583+
581584
ifdef GGML_RPC
582585
MK_CPPFLAGS += -DGGML_USE_RPC
583586
OBJ_GGML_EXT += ggml/src/ggml-rpc.o

Package.swift

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,16 @@ var cSettings: [CSetting] = [
2828
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
2929
.unsafeFlags(["-fno-objc-arc"]),
3030
.headerSearchPath("ggml/src"),
31+
.headerSearchPath("ggml/src/ggml-cpu"),
3132
// NOTE: NEW_LAPACK will required iOS version 16.4+
3233
// We should consider add this in the future when we drop support for iOS 14
3334
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
3435
// .define("ACCELERATE_NEW_LAPACK"),
3536
// .define("ACCELERATE_LAPACK_ILP64")
37+
.define("GGML_USE_CPU"),
3638
]
3739

40+
3841
#if canImport(Darwin)
3942
sources.append("ggml/src/ggml-common.h")
4043
sources.append("ggml/src/ggml-metal/ggml-metal.m")
@@ -44,7 +47,6 @@ cSettings.append(
4447
contentsOf: [
4548
.define("GGML_USE_ACCELERATE"),
4649
.define("GGML_USE_METAL"),
47-
.define("GGML_USE_CPU")
4850
]
4951
)
5052
#endif
@@ -86,5 +88,5 @@ let package = Package(
8688
linkerSettings: linkerSettings
8789
)
8890
],
89-
cxxLanguageStandard: .cxx11
91+
cxxLanguageStandard: .cxx17
9092
)

README.md

Lines changed: 250 additions & 247 deletions
Large diffs are not rendered by default.

ci/run.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,10 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
815815
ln -sfn ${mnt_models} ${SRC}/models-mnt
816816

817817
# Create a fresh python3 venv and enter it
818-
python3 -m venv "$MNT/venv"
818+
if ! python3 -m venv "$MNT/venv"; then
819+
echo "Error: Failed to create Python virtual environment at $MNT/venv."
820+
exit 1
821+
fi
819822
source "$MNT/venv/bin/activate"
820823

821824
pip install -r ${SRC}/requirements.txt --disable-pip-version-check

common/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,5 +88,5 @@ if (LLAMA_CURL)
8888
endif ()
8989

9090
target_include_directories(${TARGET} PUBLIC .)
91-
target_compile_features (${TARGET} PUBLIC cxx_std_11)
91+
target_compile_features (${TARGET} PUBLIC cxx_std_17)
9292
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)

common/common.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,17 @@ bool fs_validate_filename(const std::string & filename) {
652652

653653
std::u32string filename_utf32;
654654
try {
655+
#if defined(__clang__)
656+
// disable C++17 deprecation warning for std::codecvt_utf8
657+
# pragma clang diagnostic push
658+
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
659+
#endif
655660
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
661+
662+
#if defined(__clang__)
663+
# pragma clang diagnostic pop
664+
#endif
665+
656666
filename_utf32 = converter.from_bytes(filename);
657667

658668
// If the reverse conversion mismatches, it means overlong UTF-8 sequences were used,

0 commit comments

Comments
 (0)