Skip to content

Commit 4dc1158

Browse files
committed
Merge https://github.com/ggerganov/llama.cpp into vulkan_unsupported
2 parents 11c74f5 + 261e6a2 commit 4dc1158

File tree

11 files changed

+711
-677
lines changed

11 files changed

+711
-677
lines changed

.github/workflows/build.yml

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,8 @@ jobs:
127127
-DCMAKE_BUILD_RPATH="@loader_path" \
128128
-DLLAMA_FATAL_WARNINGS=ON \
129129
-DGGML_METAL=OFF \
130-
-DGGML_RPC=ON
130+
-DGGML_RPC=ON \
131+
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
131132
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
132133
133134
- name: Test
@@ -1051,9 +1052,13 @@ jobs:
10511052
run: examples/sycl/win-build-sycl.bat
10521053

10531054
windows-latest-cmake-hip:
1054-
if: ${{ github.event.inputs.create_release != 'true' }}
10551055
runs-on: windows-2022
10561056

1057+
env:
1058+
# The ROCm version must correspond to the version used in the HIP SDK.
1059+
ROCM_VERSION: "6.4.2"
1060+
HIPSDK_INSTALLER_VERSION: "25.Q3"
1061+
10571062
steps:
10581063
- name: Clone
10591064
id: checkout
@@ -1062,24 +1067,22 @@ jobs:
10621067
- name: Clone rocWMMA repository
10631068
id: clone_rocwmma
10641069
run: |
1065-
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
1070+
git clone https://github.com/rocm/rocwmma --branch rocm-${{ env.ROCM_VERSION }} --depth 1
10661071
10671072
- name: Cache ROCm Installation
10681073
id: cache-rocm
10691074
uses: actions/cache@v4
10701075
with:
10711076
path: C:\Program Files\AMD\ROCm
1072-
key: rocm-6.1-${{ runner.os }}-v1
1073-
restore-keys: |
1074-
rocm-6.1-${{ runner.os }}-
1077+
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
10751078

10761079
- name: Install ROCm
10771080
if: steps.cache-rocm.outputs.cache-hit != 'true'
10781081
id: depends
10791082
run: |
10801083
$ErrorActionPreference = "Stop"
10811084
write-host "Downloading AMD HIP SDK Installer"
1082-
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1085+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
10831086
write-host "Installing AMD HIP SDK"
10841087
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
10851088
$completed = $proc.WaitForExit(600000)

.github/workflows/release.yml

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ jobs:
108108
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
109109
-DLLAMA_FATAL_WARNINGS=ON \
110110
-DGGML_METAL=OFF \
111-
-DGGML_RPC=ON
111+
-DGGML_RPC=ON \
112+
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
112113
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
113114
114115
- name: Determine tag name
@@ -528,11 +529,16 @@ jobs:
528529
windows-hip:
529530
runs-on: windows-2022
530531

532+
env:
533+
# The ROCm version must correspond to the version used in the HIP SDK.
534+
ROCM_VERSION: "6.4.2"
535+
HIPSDK_INSTALLER_VERSION: "25.Q3"
536+
531537
strategy:
532538
matrix:
533539
include:
534540
- name: "radeon"
535-
gpu_targets: "gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
541+
gpu_targets: "gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
536542

537543
steps:
538544
- name: Clone
@@ -542,21 +548,19 @@ jobs:
542548
- name: Clone rocWMMA repository
543549
id: clone_rocwmma
544550
run: |
545-
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
551+
git clone https://github.com/rocm/rocwmma --branch rocm-${{ env.ROCM_VERSION }} --depth 1
546552
547553
- name: Cache ROCm Installation
548554
id: cache-rocm
549555
uses: actions/cache@v4
550556
with:
551557
path: C:\Program Files\AMD\ROCm
552-
key: rocm-6.1-${{ runner.os }}-v1
553-
restore-keys: |
554-
rocm-6.1-${{ runner.os }}-
558+
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
555559

556560
- name: ccache
557561
uses: ggml-org/[email protected]
558562
with:
559-
key: windows-latest-cmake-hip-${{ matrix.name }}-x64
563+
key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64
560564
evict-old-files: 1d
561565

562566
- name: Install ROCm
@@ -565,7 +569,7 @@ jobs:
565569
run: |
566570
$ErrorActionPreference = "Stop"
567571
write-host "Downloading AMD HIP SDK Installer"
568-
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
572+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
569573
write-host "Installing AMD HIP SDK"
570574
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
571575
$completed = $proc.WaitForExit(600000)
@@ -610,9 +614,12 @@ jobs:
610614
-DLLAMA_CURL=OFF
611615
cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS}
612616
md "build\bin\rocblas\library\"
617+
md "build\bin\hipblaslt\library"
613618
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
619+
cp "${env:HIP_PATH}\bin\hipblaslt.dll" "build\bin\"
614620
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
615621
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
622+
cp "${env:HIP_PATH}\bin\hipblaslt\library\*" "build\bin\hipblaslt\library\"
616623
617624
- name: Pack artifacts
618625
id: pack_artifacts

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,10 +1219,10 @@ @implementation GGMLMetalClass
12191219
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_IQ4_NL, set_rows_iq4_nl, true);
12201220
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_L2_NORM, l2_norm, has_simdgroup_reduction);
12211221
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GROUP_NORM, group_norm, has_simdgroup_reduction);
1222-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_NORM, norm, true);
1222+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_NORM, norm, has_simdgroup_reduction);
12231223
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SSM_CONV_F32, ssm_conv_f32, true);
1224-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SSM_SCAN_F32, ssm_scan_f32, true);
1225-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SSM_SCAN_F32_GROUP, ssm_scan_f32_group, true);
1224+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SSM_SCAN_F32, ssm_scan_f32, has_simdgroup_reduction);
1225+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SSM_SCAN_F32_GROUP, ssm_scan_f32_group, has_simdgroup_reduction);
12261226
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_RWKV_WKV6_F32, rwkv_wkv6_f32, true);
12271227
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_RWKV_WKV7_F32, rwkv_wkv7_f32, true);
12281228
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32, mul_mv_f32_f32, has_simdgroup_reduction);
@@ -1443,9 +1443,9 @@ @implementation GGMLMetalClass
14431443
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SWIGLU_OAI, swiglu_oai, true);
14441444
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GEGLU_ERF, geglu_erf, true);
14451445
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GEGLU_QUICK, geglu_quick, true);
1446-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
1447-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MEAN, mean, true);
1448-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGMAX, argmax, true);
1446+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, has_simdgroup_reduction);
1447+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MEAN, mean, has_simdgroup_reduction);
1448+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGMAX, argmax, has_simdgroup_reduction);
14491449
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_AVG_F32, pool_2d_avg_f32, true);
14501450
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32, pool_2d_max_f32, true);
14511451
}
@@ -1982,7 +1982,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
19821982
case GGML_OP_L2_NORM:
19831983
return has_simdgroup_reduction && (op->ne[0] % 4 == 0 && ggml_is_contiguous_1(op->src[0]));
19841984
case GGML_OP_ARGMAX:
1985-
return true;
1985+
return has_simdgroup_reduction;
19861986
case GGML_OP_NORM:
19871987
return has_simdgroup_reduction && (op->ne[0] % 4 == 0 && ggml_is_contiguous_1(op->src[0]));
19881988
case GGML_OP_ROPE:
@@ -2028,6 +2028,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
20282028
return has_simdgroup_mm; // TODO: over-restricted for vec-kernels
20292029
case GGML_OP_SSM_CONV:
20302030
case GGML_OP_SSM_SCAN:
2031+
return has_simdgroup_reduction;
20312032
case GGML_OP_RWKV_WKV6:
20322033
case GGML_OP_RWKV_WKV7:
20332034
return true;

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,8 +1231,6 @@ static std::string format_size(size_t size) {
12311231
return oss.str();
12321232
}
12331233

1234-
static std::mutex log_mutex;
1235-
12361234
class vk_memory_logger {
12371235
public:
12381236
vk_memory_logger(): total_device(0), total_host(0) {}
@@ -1422,6 +1420,8 @@ struct ggml_backend_vk_buffer_context {
14221420
};
14231421

14241422
#ifdef GGML_VULKAN_MEMORY_DEBUG
1423+
static std::mutex log_mutex;
1424+
14251425
void vk_memory_logger::log_allocation(vk_buffer_ref buf_ref, size_t size) {
14261426
std::lock_guard<std::mutex> guard(log_mutex);
14271427
vk_buffer buf = buf_ref.lock();
@@ -13152,16 +13152,16 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_cgraph *
1315213152
} else if (tensor->op == GGML_OP_IM2COL_3D) {
1315313153
const int32_t s0 = tensor->op_params[0];
1315413154
const int32_t s1 = tensor->op_params[1];
13155-
const int32_t s1 = tensor->op_params[2];
13155+
const int32_t s2 = tensor->op_params[2];
1315613156
const int32_t p0 = tensor->op_params[3];
1315713157
const int32_t p1 = tensor->op_params[4];
13158-
const int32_t p1 = tensor->op_params[5];
13158+
const int32_t p2 = tensor->op_params[5];
1315913159
const int32_t d0 = tensor->op_params[6];
1316013160
const int32_t d1 = tensor->op_params[7];
13161-
const int32_t d1 = tensor->op_params[8];
13161+
const int32_t d2 = tensor->op_params[8];
1316213162
const int32_t IC = tensor->op_params[9];
1316313163

13164-
tensor_clone = ggml_im2col(ggml_ctx, src_clone[0], src_clone[1], IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, tensor->type);
13164+
tensor_clone = ggml_im2col_3d(ggml_ctx, src_clone[0], src_clone[1], IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, tensor->type);
1316513165
} else if (tensor->op == GGML_OP_TIMESTEP_EMBEDDING) {
1316613166
const int32_t dim = tensor->op_params[0];
1316713167
const int32_t max_period = tensor->op_params[1];

0 commit comments

Comments
 (0)