Skip to content

Commit 3f6406f

Browse files
authored
Merge branch 'ggerganov:master' into master
2 parents 4278480 + 3952a22 commit 3f6406f

File tree

8 files changed

+103
-51
lines changed

8 files changed

+103
-51
lines changed

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@ if (WIN32)
4646
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
4747
endif()
4848

49+
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
50+
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/source-charset:utf-8>")
51+
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/source-charset:utf-8>")
52+
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/execution-charset:utf-8>")
53+
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/execution-charset:utf-8>")
54+
endif()
55+
4956
#
5057
# option list
5158
#

Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -730,10 +730,10 @@ GLSLC_CMD = glslc
730730
_ggml_vk_genshaders_cmd = $(shell pwd)/vulkan-shaders-gen
731731
_ggml_vk_header = ggml/src/ggml-vulkan-shaders.hpp
732732
_ggml_vk_source = ggml/src/ggml-vulkan-shaders.cpp
733-
_ggml_vk_input_dir = ggml/src/vulkan-shaders
733+
_ggml_vk_input_dir = ggml/src/ggml-vulkan/vulkan-shaders
734734
_ggml_vk_shader_deps = $(echo $(_ggml_vk_input_dir)/*.comp)
735735

736-
ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source)
736+
ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source)
737737
$(CXX) $(CXXFLAGS) $(shell pkg-config --cflags vulkan) -c $< -o $@
738738

739739
$(_ggml_vk_header): $(_ggml_vk_source)
@@ -745,8 +745,8 @@ $(_ggml_vk_source): $(_ggml_vk_shader_deps) vulkan-shaders-gen
745745
--target-hpp $(_ggml_vk_header) \
746746
--target-cpp $(_ggml_vk_source)
747747

748-
vulkan-shaders-gen: ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
749-
$(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
748+
vulkan-shaders-gen: ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
749+
$(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
750750

751751
endif # GGML_VULKAN
752752

cmake/llama-config.cmake.in

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,11 @@ set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
33
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
44
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
55

6-
set(GGML_BLAS @GGML_BLAS@)
7-
set(GGML_CUDA @GGML_CUDA@)
8-
set(GGML_METAL @GGML_METAL@)
9-
set(GGML_HIP @GGML_HIP@)
106
set(GGML_ACCELERATE @GGML_ACCELERATE@)
11-
set(GGML_VULKAN @GGML_VULKAN@)
127
set(GGML_VULKAN_CHECK_RESULTS @GGML_VULKAN_CHECK_RESULTS@)
138
set(GGML_VULKAN_DEBUG @GGML_VULKAN_DEBUG@)
149
set(GGML_VULKAN_MEMORY_DEBUG @GGML_VULKAN_MEMORY_DEBUG@)
1510
set(GGML_VULKAN_VALIDATE @GGML_VULKAN_VALIDATE@)
16-
set(GGML_SYCL @GGML_SYCL@)
1711
set(GGML_OPENMP @GGML_OPENMP@)
1812

1913
@PACKAGE_INIT@
@@ -22,10 +16,39 @@ set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
2216
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
2317
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
2418

25-
# Ensure transient dependencies satisfied
26-
2719
find_package(Threads REQUIRED)
2820

21+
set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@")
22+
set(_llama_link_deps "")
23+
foreach(_ggml_lib ggml ggml-base)
24+
string(REPLACE "-" "_" _ggml_lib_var "${_ggml_lib}_LIBRARY")
25+
find_library(${_ggml_lib_var} ${_ggml_lib}
26+
REQUIRED
27+
HINTS ${LLAMA_LIB_DIR}
28+
NO_CMAKE_FIND_ROOT_PATH
29+
)
30+
list(APPEND _llama_link_deps "${${_ggml_lib_var}}")
31+
message(STATUS "Found ${${_ggml_lib_var}}")
32+
endforeach()
33+
34+
foreach(backend amx blas cann cpu cuda hip kompute metal musa rpc sycl vulkan)
35+
string(TOUPPER "GGML_${backend}" backend_id)
36+
set(_ggml_lib "ggml-${backend}")
37+
string(REPLACE "-" "_" _ggml_lib_var "${_ggml_lib}_LIBRARY")
38+
39+
find_library(${_ggml_lib_var} ${_ggml_lib}
40+
HINTS ${LLAMA_LIB_DIR}
41+
NO_CMAKE_FIND_ROOT_PATH
42+
)
43+
if(${_ggml_lib_var})
44+
list(APPEND _llama_link_deps "${${_ggml_lib_var}}")
45+
set(${backend_id} ON)
46+
message(STATUS "Found backend ${${_ggml_lib_var}}")
47+
else()
48+
set(${backend_id} OFF)
49+
endif()
50+
endforeach()
51+
2952
if (APPLE AND GGML_ACCELERATE)
3053
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
3154
endif()
@@ -48,7 +71,7 @@ if (GGML_VULKAN)
4871
find_package(Vulkan REQUIRED)
4972
endif()
5073

51-
if (GGML_HIPBLAS)
74+
if (GGML_HIP)
5275
find_package(hip REQUIRED)
5376
find_package(hipblas REQUIRED)
5477
find_package(rocblas REQUIRED)
@@ -63,24 +86,13 @@ if (GGML_OPENMP)
6386
find_package(OpenMP REQUIRED)
6487
endif()
6588

66-
67-
find_library(ggml_LIBRARY ggml
68-
REQUIRED
69-
HINTS ${LLAMA_LIB_DIR}
70-
NO_CMAKE_FIND_ROOT_PATH
71-
)
72-
7389
find_library(llama_LIBRARY llama
7490
REQUIRED
7591
HINTS ${LLAMA_LIB_DIR}
7692
NO_CMAKE_FIND_ROOT_PATH
7793
)
7894

79-
set(_llama_link_deps "${ggml_LIBRARY}" "@GGML_LINK_LIBRARIES@")
80-
set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@")
81-
8295
add_library(llama UNKNOWN IMPORTED)
83-
8496
set_target_properties(llama
8597
PROPERTIES
8698
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"

ggml/CMakeLists.txt

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -236,12 +236,8 @@ set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
236236
#if (GGML_METAL)
237237
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
238238
#endif()
239-
install(TARGETS ggml PUBLIC_HEADER)
240-
241-
if (BUILD_SHARED_LIBS)
242-
install(TARGETS ggml LIBRARY)
243-
install(TARGETS ggml-base LIBRARY)
244-
endif()
239+
install(TARGETS ggml LIBRARY PUBLIC_HEADER)
240+
install(TARGETS ggml-base LIBRARY)
245241

246242
# FIXME: this should be done in the backend cmake files
247243
if (GGML_METAL)

ggml/src/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,8 @@ function(ggml_add_backend backend)
239239
if (${BUILD_SHARED_LIBS})
240240
target_compile_definitions(${backend_target} PRIVATE GGML_BACKEND_BUILD)
241241
target_compile_definitions(${backend_target} PUBLIC GGML_BACKEND_SHARED)
242-
install(TARGETS ${backend_target} LIBRARY)
243242
endif()
243+
install(TARGETS ${backend_target} LIBRARY)
244244
target_link_libraries(ggml PUBLIC ${backend_target})
245245
string(TOUPPER "GGML_USE_${backend}" backend_use)
246246
target_compile_definitions(ggml PUBLIC ${backend_use})

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
126126
GGML_METAL_KERNEL_TYPE_GELU_QUICK_4,
127127
GGML_METAL_KERNEL_TYPE_SILU,
128128
GGML_METAL_KERNEL_TYPE_SILU_4,
129+
GGML_METAL_KERNEL_TYPE_ELU,
129130
GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16,
130131
GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16_4,
131132
GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32,
@@ -649,6 +650,7 @@ @implementation GGMLMetalClass
649650
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU_QUICK_4, gelu_quick_4, true);
650651
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SILU, silu, true);
651652
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SILU_4, silu_4, true);
653+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ELU, elu, true);
652654
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16, soft_max_f16, has_simdgroup_reduction);
653655
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16_4, soft_max_f16_4, has_simdgroup_reduction);
654656
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32, soft_max_f32, has_simdgroup_reduction);
@@ -968,6 +970,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
968970
case GGML_UNARY_OP_GELU:
969971
case GGML_UNARY_OP_GELU_QUICK:
970972
case GGML_UNARY_OP_SILU:
973+
case GGML_UNARY_OP_ELU:
971974
return ggml_is_contiguous(op->src[0]);
972975
default:
973976
return false;
@@ -1589,6 +1592,18 @@ static void ggml_metal_encode_node(
15891592

15901593
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
15911594
} break;
1595+
case GGML_UNARY_OP_ELU:
1596+
{
1597+
id<MTLComputePipelineState> pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ELU].pipeline;
1598+
1599+
[encoder setComputePipelineState:pipeline];
1600+
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
1601+
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
1602+
1603+
const int64_t n = ggml_nelements(dst);
1604+
1605+
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
1606+
} break;
15921607
default:
15931608
{
15941609
GGML_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(dst->op));

ggml/src/ggml-metal/ggml-metal.metal

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,14 @@ kernel void kernel_silu_4(
782782
dst[tpig] = x / (1.0f + exp(-x));
783783
}
784784

785+
kernel void kernel_elu(
786+
device const float * src0,
787+
device float * dst,
788+
uint tpig[[thread_position_in_grid]]) {
789+
device const float & x = src0[tpig];
790+
dst[tpig] = (x > 0.0f) ? x : (exp(x) - 1.0f);
791+
}
792+
785793
kernel void kernel_sqr(
786794
device const float * src0,
787795
device float * dst,
@@ -2137,20 +2145,34 @@ kernel void kernel_im2col(
21372145
uint3 tgpg[[threadgroups_per_grid]],
21382146
uint3 tpitg[[thread_position_in_threadgroup]],
21392147
uint3 ntg[[threads_per_threadgroup]]) {
2140-
const int32_t iiw = tgpig[2] * s0 + tpitg[2] * d0 - p0;
2141-
const int32_t iih = tgpig[1] * s1 + tpitg[1] * d1 - p1;
2148+
// const int64_t IC = tgpg[0];
2149+
const int64_t OH = tgpg[1];
2150+
const int64_t OW = tgpg[2];
21422151

2143-
const int32_t offset_dst =
2144-
(tpitg[0] * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW +
2145-
(tgpig[0] * (ntg[1] * ntg[2]) + tpitg[1] * ntg[2] + tpitg[2]);
2152+
// const int64_t N = ntg[0];
2153+
const int64_t KH = ntg[1];
2154+
const int64_t KW = ntg[2];
2155+
2156+
const int64_t in = tpitg[0];
2157+
const int64_t ikh = tpitg[1];
2158+
const int64_t ikw = tpitg[2];
2159+
2160+
const int64_t iic = tgpig[0];
2161+
const int64_t ioh = tgpig[1];
2162+
const int64_t iow = tgpig[2];
2163+
2164+
const int64_t iiw = iow*s0 + ikw*d0 - p0;
2165+
const int64_t iih = ioh*s1 + ikh*d1 - p1;
2166+
2167+
const int64_t offset_dst = (in*OH*OW + ioh*OW + iow)*CHW + (iic*(KH*KW) + ikh*KW + ikw);
21462168

21472169
device T * pdst = (device T *) (dst);
21482170

21492171
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
21502172
pdst[offset_dst] = 0.0f;
21512173
} else {
2152-
const int32_t offset_src = tpitg[0] * ofs0 + tgpig[0] * ofs1;
2153-
pdst[offset_dst] = x[offset_src + iih * IW + iiw];
2174+
const int64_t offset_src = in*ofs0 + iic*ofs1 + iih*IW + iiw;
2175+
pdst[offset_dst] = x[offset_src];
21542176
}
21552177
}
21562178

@@ -2201,25 +2223,25 @@ kernel void kernel_im2col_ext(
22012223
uint3 tgpg[[threadgroups_per_grid]], // tgpg[0] = D x IC x KH x KW, CHW = IC x KH x KW
22022224
uint3 tpitg[[thread_position_in_threadgroup]],
22032225
uint3 ntg[[threads_per_threadgroup]]) { // [M, 1, 1]
2204-
const int32_t KHW = KH * KW; // KHW == ntg[1] * ntg[2], KW == ntg[2]
2226+
const int64_t KHW = KH * KW; // KHW == ntg[1] * ntg[2], KW == ntg[2]
22052227

2206-
const int32_t d = tgpig[0] / CHW;
2207-
const int32_t chw = tgpig[0] % CHW;
2208-
const int32_t tgpig_0 = chw / KHW; // 0 ~ (IC - 1)
2209-
const int32_t HW = tgpig[0] % KHW;
2228+
const int64_t d = tgpig[0] / CHW;
2229+
const int64_t chw = tgpig[0] % CHW;
2230+
const int64_t tgpig_0 = chw / KHW; // 0 ~ (IC - 1)
2231+
const int64_t HW = tgpig[0] % KHW;
22102232

2211-
const int32_t tpitg_0 = (d * ntg[0]) + tpitg[0];
2233+
const int64_t tpitg_0 = (d * ntg[0]) + tpitg[0];
22122234
if (tpitg_0 >= N) {
22132235
return;
22142236
}
22152237

2216-
const int32_t tpitg_1 = HW / KW;
2217-
const int32_t tpitg_2 = HW % KW;
2238+
const int64_t tpitg_1 = HW / KW;
2239+
const int64_t tpitg_2 = HW % KW;
22182240

2219-
const int32_t iiw = tgpig[2] * s0 + tpitg_2 * d0 - p0;
2220-
const int32_t iih = tgpig[1] * s1 + tpitg_1 * d1 - p1;
2241+
const int64_t iiw = tgpig[2] * s0 + tpitg_2 * d0 - p0;
2242+
const int64_t iih = tgpig[1] * s1 + tpitg_1 * d1 - p1;
22212243

2222-
const int32_t offset_dst =
2244+
const int64_t offset_dst =
22232245
(tpitg_0 * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW +
22242246
(tgpig_0 * KHW + tpitg_1 * KW + tpitg_2);
22252247

@@ -2228,7 +2250,7 @@ kernel void kernel_im2col_ext(
22282250
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
22292251
pdst[offset_dst] = 0.0f;
22302252
} else {
2231-
const int32_t offset_src = tpitg_0 * ofs0 + tgpig_0 * ofs1;
2253+
const int64_t offset_src = tpitg_0 * ofs0 + tgpig_0 * ofs1;
22322254
pdst[offset_dst] = x[offset_src + iih * IW + iiw];
22332255
}
22342256
}

scripts/sync-ggml.last

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
9d0708e863f3aa2fc1eb0b75d433303c30bd0dbc
1+
2884dd72fea8922910fe53387c3d17ab928d3a8e

0 commit comments

Comments
 (0)