ggml-org
diff --git a/‎.clang-tidy‎
Lines changed: 2 additions & 0 deletions b/‎.clang-tidy‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.devops/llama-server.Dockerfile‎
Lines changed: 16 additions & 4 deletions b/‎.devops/llama-server.Dockerfile‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 5 additions & 0 deletions b/‎.github/workflows/build.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 0 additions & 4 deletions b/‎CMakeLists.txt‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎Makefile‎
Lines changed: 7 additions & 4 deletions b/‎Makefile‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎Package.swift‎
Lines changed: 4 additions & 2 deletions b/‎Package.swift‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 250 additions & 247 deletions b/‎README.md‎
Lines changed: 250 additions & 247 deletions
diff --git a/‎ci/run.sh‎
Lines changed: 4 additions & 1 deletion b/‎ci/run.sh‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎common/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎common/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎common/common.cpp‎
Lines changed: 10 additions & 0 deletions b/‎common/common.cpp‎
Lines changed: 10 additions & 0 deletions
@@ -17,8 +17,10 @@ Checks: >
     -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
     performance-*,
     portability-*,
+    -portability-simd-intrinsics,
     misc-*,
     -misc-const-correctness,
     -misc-non-private-member-variables-in-classes,
     -misc-no-recursion,
+    -misc-use-anonymous-namespace,
 FormatStyle: none
@@ -3,22 +3,34 @@ ARG UBUNTU_VERSION=22.04
 FROM ubuntu:$UBUNTU_VERSION AS build
 
 RUN apt-get update && \
-    apt-get install -y build-essential git libcurl4-openssl-dev
+    apt-get install -y build-essential git cmake libcurl4-openssl-dev
 
 WORKDIR /app
 
 COPY . .
 
-ENV LLAMA_CURL=1
 
-RUN make -j$(nproc) llama-server
+RUN \
+    # Build multiple versions of the CPU backend
+    scripts/build-cpu.sh avx         -DGGML_AVX=ON -DGGML_AVX2=OFF && \
+    scripts/build-cpu.sh avx2        -DGGML_AVX=ON -DGGML_AVX2=ON && \
+    scripts/build-cpu.sh avx512      -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
+    scripts/build-cpu.sh amx         -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
+    # Build llama-server
+    cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
+    cmake --build build --target llama-server -j $(nproc) && \
+    # Copy the built libraries to /app/lib
+    mkdir -p /app/lib && \
+    mv libggml-cpu* /app/lib/ && \
+    find build -name "*.so" -exec cp {} /app/lib/ \;
 
 FROM ubuntu:$UBUNTU_VERSION AS runtime
 
 RUN apt-get update && \
     apt-get install -y libcurl4-openssl-dev libgomp1 curl
 
-COPY --from=build /app/llama-server /llama-server
+COPY --from=build /app/build/bin/llama-server /llama-server
+COPY --from=build /app/lib/ /
 
 ENV LC_ALL=C.utf8
 # Must be set to 0.0.0.0 so it can listen to requests from host machine
 
@@ -1121,6 +1121,11 @@ jobs:
         run: |
           & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
 
+      - name: Install ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: ${{ github.job }}
+
       - name: Build
         id: cmake_build
         run: |
 
@@ -96,10 +96,6 @@ if (NOT DEFINED GGML_LLAMAFILE)
     set(GGML_LLAMAFILE_DEFAULT ON)
 endif()
 
-if (NOT DEFINED GGML_AMX)
-    set(GGML_AMX ON)
-endif()
-
 if (NOT DEFINED GGML_CUDA_GRAPHS)
     set(GGML_CUDA_GRAPHS_DEFAULT ON)
 endif()
 
@@ -251,11 +251,11 @@ endif
 # Compile flags
 #
 
-# keep standard at C11 and C++11
+# keep standard at C11 and C++17
 MK_CPPFLAGS  = -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -DGGML_USE_CPU
 MK_CFLAGS    = -std=c11   -fPIC
-MK_CXXFLAGS  = -std=c++11 -fPIC
-MK_NVCCFLAGS = -std=c++11
+MK_CXXFLAGS  = -std=c++17 -fPIC
+MK_NVCCFLAGS = -std=c++17
 
 ifdef LLAMA_NO_CCACHE
 GGML_NO_CCACHE := 1
@@ -575,9 +575,12 @@ endif
 
 ifndef GGML_NO_AMX
 	MK_CPPFLAGS += -DGGML_USE_AMX
-	OBJ_GGML_EXT += ggml/src/ggml-amx/ggml-amx.o ggml/src/ggml-amx/mmq.o
+	OBJ_GGML_EXT += ggml/src/ggml-cpu/amx/amx.o ggml/src/ggml-cpu/amx/mmq.o
 endif
 
+# only necessary for the CPU backend files
+MK_CPPFLAGS += -Iggml/src/ggml-cpu
+
 ifdef GGML_RPC
 	MK_CPPFLAGS  += -DGGML_USE_RPC
 	OBJ_GGML_EXT += ggml/src/ggml-rpc.o
 
@@ -28,13 +28,16 @@ var cSettings: [CSetting] =  [
     .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
     .unsafeFlags(["-fno-objc-arc"]),
     .headerSearchPath("ggml/src"),
+    .headerSearchPath("ggml/src/ggml-cpu"),
     // NOTE: NEW_LAPACK will required iOS version 16.4+
     // We should consider add this in the future when we drop support for iOS 14
     // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
     // .define("ACCELERATE_NEW_LAPACK"),
     // .define("ACCELERATE_LAPACK_ILP64")
+    .define("GGML_USE_CPU"),
 ]
 
+
 #if canImport(Darwin)
 sources.append("ggml/src/ggml-common.h")
 sources.append("ggml/src/ggml-metal/ggml-metal.m")
@@ -44,7 +47,6 @@ cSettings.append(
     contentsOf: [
         .define("GGML_USE_ACCELERATE"),
         .define("GGML_USE_METAL"),
-        .define("GGML_USE_CPU")
     ]
 )
 #endif
@@ -86,5 +88,5 @@ let package = Package(
             linkerSettings: linkerSettings
         )
     ],
-    cxxLanguageStandard: .cxx11
+    cxxLanguageStandard: .cxx17
 )
@@ -815,7 +815,10 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
     ln -sfn ${mnt_models} ${SRC}/models-mnt
 
     # Create a fresh python3 venv and enter it
-    python3 -m venv "$MNT/venv"
+    if ! python3 -m venv "$MNT/venv"; then
+        echo "Error: Failed to create Python virtual environment at $MNT/venv."
+        exit 1
+    fi
     source "$MNT/venv/bin/activate"
 
     pip install -r ${SRC}/requirements.txt --disable-pip-version-check
 
@@ -88,5 +88,5 @@ if (LLAMA_CURL)
 endif ()
 
 target_include_directories(${TARGET} PUBLIC .)
-target_compile_features   (${TARGET} PUBLIC cxx_std_11)
+target_compile_features   (${TARGET} PUBLIC cxx_std_17)
 target_link_libraries     (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
@@ -652,7 +652,17 @@ bool fs_validate_filename(const std::string & filename) {
 
     std::u32string filename_utf32;
     try {
+#if defined(__clang__)
+        // disable C++17 deprecation warning for std::codecvt_utf8
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#endif
         std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
+
+#if defined(__clang__)
+#    pragma clang diagnostic pop
+#endif
+
         filename_utf32 = converter.from_bytes(filename);
 
         // If the reverse conversion mismatches, it means overlong UTF-8 sequences were used,
Original file line number	Diff line number	Diff line change
`@@ -28,13 +28,16 @@ var cSettings: [CSetting] = [`
`28`	`28`	`.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),`
`29`	`29`	`.unsafeFlags(["-fno-objc-arc"]),`
`30`	`30`	`.headerSearchPath("ggml/src"),`
	`31`	`+ .headerSearchPath("ggml/src/ggml-cpu"),`
`31`	`32`	`// NOTE: NEW_LAPACK will required iOS version 16.4+`
`32`	`33`	`// We should consider add this in the future when we drop support for iOS 14`
`33`	`34`	`// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)`
`34`	`35`	`// .define("ACCELERATE_NEW_LAPACK"),`
`35`	`36`	`// .define("ACCELERATE_LAPACK_ILP64")`
	`37`	`+ .define("GGML_USE_CPU"),`
`36`	`38`	`]`
`37`	`39`
	`40`	`+`
`38`	`41`	`#if canImport(Darwin)`
`39`	`42`	`sources.append("ggml/src/ggml-common.h")`
`40`	`43`	`sources.append("ggml/src/ggml-metal/ggml-metal.m")`
`@@ -44,7 +47,6 @@ cSettings.append(`
`44`	`47`	`contentsOf: [`
`45`	`48`	`.define("GGML_USE_ACCELERATE"),`
`46`	`49`	`.define("GGML_USE_METAL"),`
`47`		`- .define("GGML_USE_CPU")`
`48`	`50`	`]`
`49`	`51`	`)`
`50`	`52`	`#endif`
`@@ -86,5 +88,5 @@ let package = Package(`
`86`	`88`	`linkerSettings: linkerSettings`
`87`	`89`	`)`
`88`	`90`	`],`
`89`		`- cxxLanguageStandard: .cxx11`
	`91`	`+ cxxLanguageStandard: .cxx17`
`90`	`92`	`)`