Skip to content

Commit c28cb37

Browse files
Merge pull request #91 from menloresearch/update-dev-from-master-2025-05-15-22-52
Sync master with upstream release b5400
2 parents 86ba230 + c6a2c9e commit c28cb37

34 files changed

+1346
-636
lines changed

common/chat.cpp

Lines changed: 125 additions & 105 deletions
Large diffs are not rendered by default.

common/chat.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#pragma once
44

55
#include "common.h"
6+
#include <chrono>
67
#include <string>
78
#include <vector>
89

@@ -71,6 +72,7 @@ struct common_chat_templates_inputs {
7172
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
7273
bool parallel_tool_calls = false;
7374
bool extract_reasoning = true;
75+
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
7476
};
7577

7678
struct common_chat_params {

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2069,6 +2069,9 @@ def set_gguf_parameters(self):
20692069
self.gguf_writer.add_expert_feed_forward_length(self.hparams["intermediate_size_moe"])
20702070

20712071
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
2072+
if name.startswith("language_model."):
2073+
name = name.replace("language_model.", "")
2074+
20722075
# split the gate_up into gate and up
20732076
if "gate_up_proj" in name:
20742077
name_up = name.replace("gate_up_proj", "up_proj.weight")

docs/backend/SYCL.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
731731
| GGML_SYCL_DEVICE_ARCH | Optional (except for AMD) | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. |
732732
| GGML_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. |
733733
| GGML_SYCL_GRAPH | ON *(default)* \|OFF *(Optional)* | Enable build with [SYCL Graph extension](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc). |
734+
| GGML_SYCL_DNN | ON *(default)* \|OFF *(Optional)* | Enable build with oneDNN. |
734735
| CMAKE_C_COMPILER | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path. |
735736
| CMAKE_CXX_COMPILER | `icpx` *(Linux)*, `icx` *(Windows)* | Set `icpx/icx` compiler for SYCL code path. |
736737

@@ -741,6 +742,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
741742
| GGML_SYCL_DEBUG | 0 (default) or 1 | Enable log function by macro: GGML_SYCL_DEBUG |
742743
| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features based on Intel GPU type, to compare the performance increase |
743744
| GGML_SYCL_DISABLE_GRAPH | 0 or 1 (default) | Disable running computations through SYCL Graphs feature. Disabled by default because graph performance isn't yet better than non-graph performance. |
745+
| GGML_SYCL_DISABLE_DNN | 0 (default) or 1 | Disable running computations through oneDNN and always use oneMKL. |
744746
| ZES_ENABLE_SYSMAN | 0 (default) or 1 | Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.<br>Recommended to use when --split-mode = layer |
745747

746748

ggml/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ option(GGML_RPC "ggml: use RPC"
193193
option(GGML_SYCL "ggml: use SYCL" OFF)
194194
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
195195
option(GGML_SYCL_GRAPH "ggml: enable graphs in the SYCL backend" ON)
196+
option(GGML_SYCL_DNN "ggml: enable oneDNN in the SYCL backend" ON)
196197
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
197198
"ggml: sycl target device")
198199
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING

ggml/src/ggml-sycl/CMakeLists.txt

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -49,34 +49,38 @@ endif()
4949
target_compile_options(ggml-sycl PRIVATE "-Wno-narrowing")
5050

5151
# Link against oneDNN
52-
find_package(DNNL)
5352
set(GGML_SYCL_DNNL 0)
54-
if(DNNL_FOUND)
55-
if (NOT DEFINED DNNL_GPU_VENDOR)
56-
# default to intel target
57-
set(DNNL_GPU_VENDOR "INTEL")
58-
if(NOT "${GGML_SYCL_TARGET}" STREQUAL "INTEL")
59-
message(WARNING "oneDNN builds bundled with oneapi release only support INTEL target")
53+
if(GGML_SYCL_DNN)
54+
find_package(DNNL)
55+
if(DNNL_FOUND)
56+
if (NOT DEFINED DNNL_GPU_VENDOR)
57+
# default to intel target
58+
set(DNNL_GPU_VENDOR "INTEL")
59+
if(NOT "${GGML_SYCL_TARGET}" STREQUAL "INTEL")
60+
message(WARNING "oneDNN builds bundled with oneapi release only support INTEL target")
61+
endif()
6062
endif()
61-
endif()
6263

63-
# Verify oneDNN was compiled for the same target as llama
64-
if("${GGML_SYCL_TARGET}" STREQUAL "${DNNL_GPU_VENDOR}")
65-
target_link_libraries(ggml-sycl PRIVATE DNNL::dnnl)
66-
set(GGML_SYCL_DNNL 1)
67-
get_target_property(CONFIGS DNNL::dnnl IMPORTED_CONFIGURATIONS)
68-
foreach(CONFIG ${CONFIGS})
69-
get_target_property(DNNL_LIB DNNL::dnnl IMPORTED_LOCATION_${CONFIG})
70-
message(STATUS "Found oneDNN: ${DNNL_LIB}")
71-
endforeach()
64+
# Verify oneDNN was compiled for the same target as llama
65+
if("${GGML_SYCL_TARGET}" STREQUAL "${DNNL_GPU_VENDOR}")
66+
target_link_libraries(ggml-sycl PRIVATE DNNL::dnnl)
67+
set(GGML_SYCL_DNNL 1)
68+
get_target_property(CONFIGS DNNL::dnnl IMPORTED_CONFIGURATIONS)
69+
foreach(CONFIG ${CONFIGS})
70+
get_target_property(DNNL_LIB DNNL::dnnl IMPORTED_LOCATION_${CONFIG})
71+
message(STATUS "Found oneDNN: ${DNNL_LIB}")
72+
endforeach()
73+
else()
74+
message(WARNING
75+
"oneDNN must be compiled for the same target as llama.cpp.
76+
llama.cpp: ${GGML_SYCL_TARGET}, oneDNN: ${DNNL_GPU_VENDOR}.
77+
Disabling oneDNN support.")
78+
endif()
7279
else()
73-
message(WARNING
74-
"oneDNN must be compiled for the same target as llama.cpp.
75-
llama.cpp: ${GGML_SYCL_TARGET}, oneDNN: ${DNNL_GPU_VENDOR}.
76-
Disabling oneDNN support.")
80+
message(STATUS "oneDNN not found, disabling oneDNN support")
7781
endif()
7882
else()
79-
message(STATUS "oneDNN not found, disabling oneDNN support")
83+
message(STATUS "oneDNN support disabled by the user")
8084
endif()
8185
target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_DNNL=${GGML_SYCL_DNNL})
8286

0 commit comments

Comments
 (0)