Skip to content

Commit f70b514

Browse files
committed
Merge branch 'cuda-build-doc' of https://github.com/YannFollet/llama.cpp into cuda-build-doc
2 parents de1bb5a + 26a8406 commit f70b514

File tree

6 files changed

+51
-13
lines changed

6 files changed

+51
-13
lines changed

CMakeLists.txt

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,9 @@ if (WIN32)
4646
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
4747
endif()
4848

49-
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
50-
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/source-charset:utf-8>")
51-
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/source-charset:utf-8>")
52-
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/execution-charset:utf-8>")
53-
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/execution-charset:utf-8>")
49+
if (MSVC)
50+
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
51+
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
5452
endif()
5553

5654
#

CMakePresets.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@
3131
{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
3232
{ "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } },
3333

34+
{
35+
"name": "x64-windows-llvm", "hidden": true,
36+
"cacheVariables": {
37+
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake"
38+
}
39+
},
40+
3441
{
3542
"name": "arm64-windows-msvc", "hidden": true,
3643
"architecture": { "value": "arm64", "strategy": "external" },
@@ -70,6 +77,11 @@
7077
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
7178
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
7279

80+
{ "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] },
81+
{ "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] },
82+
{ "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] },
83+
{ "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] },
84+
7385
{ "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] },
7486
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
7587
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },

cmake/x64-windows-llvm.cmake

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
set( CMAKE_SYSTEM_NAME Windows )
2+
set( CMAKE_SYSTEM_PROCESSOR x86_64 )
3+
4+
set( CMAKE_C_COMPILER clang )
5+
set( CMAKE_CXX_COMPILER clang++ )
6+
7+
set( arch_c_flags "-march=native" )
8+
9+
set( CMAKE_C_FLAGS_INIT "${arch_c_flags}" )
10+
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" )
11+

docs/build.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,13 @@ cmake --build build --config Release
5757
```
5858
Building for arm64 can also be done with the MSVC compiler with the build-arm64-windows-MSVC preset, or the standard CMake build instructions. However, note that the MSVC compiler does not support inline ARM assembly code, used e.g. for the accelerated Q4_0_N_M CPU kernels.
5959
60+
For building with ninja generator and clang compiler as default:
61+
-set path:set LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\x64;C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.41.34120\lib\x64\uwp;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\x64
62+
```bash
63+
cmake --preset x64-windows-llvm-release
64+
cmake --build build-x64-windows-llvm-release
65+
```
66+
6067
## BLAS Build
6168
6269
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Using BLAS doesn't affect the generation performance. There are currently several different BLAS implementations available for build and use:

ggml/src/ggml-cuda/mmv.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ static __global__ void mul_mat_vec(
5757
if (block_size > WARP_SIZE) {
5858
buf_iw[tid/WARP_SIZE] = sumf;
5959
__syncthreads();
60-
if (tid > WARP_SIZE) {
60+
if (tid >= WARP_SIZE) {
6161
return;
6262
}
6363
sumf = buf_iw[tid];

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ static_assert(sizeof(vk_op_unary_push_constants) <= 128, "sizeof(vk_op_unary_pus
427427
// and a shift:
428428
//
429429
// n/d = (mulhi(n, mp) + n) >> L;
430-
void init_fastdiv_values(uint32_t d, uint32_t &mp, uint32_t &L)
430+
static void init_fastdiv_values(uint32_t d, uint32_t &mp, uint32_t &L)
431431
{
432432
// compute L = ceil(log2(d));
433433
L = 0;
@@ -439,6 +439,7 @@ void init_fastdiv_values(uint32_t d, uint32_t &mp, uint32_t &L)
439439
}
440440

441441
template <typename T> void init_pushconst_fastdiv(T &p) {
442+
GGML_UNUSED(p);
442443
static_assert(!std::is_const<T>::value, "unexpected type");
443444
}
444445

@@ -3417,7 +3418,7 @@ static uint32_t ggml_vk_guess_split_k(ggml_backend_vk_context * ctx, int m, int
34173418
return split_k;
34183419
}
34193420

3420-
static vk_pipeline ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, bool aligned, ggml_type type_a) {
3421+
static vk_pipeline ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, bool aligned) {
34213422
VK_LOG_DEBUG("ggml_vk_guess_matmul_pipeline(" << m << ", " << n << ", " << aligned << ")");
34223423

34233424
if (ctx->device->coopmat2) {
@@ -3439,9 +3440,9 @@ static vk_pipeline ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx,
34393440
return aligned ? mmp->a_l : mmp->l;
34403441
}
34413442

3442-
static uint32_t ggml_vk_guess_matmul_pipeline_align(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, ggml_type type_a) {
3443+
static uint32_t ggml_vk_guess_matmul_pipeline_align(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n) {
34433444
VK_LOG_DEBUG("ggml_vk_guess_matmul_pipeline_align(" << m << ", " << n << ")");
3444-
return ggml_vk_guess_matmul_pipeline(ctx, mmp, m, n, true, type_a)->align;
3445+
return ggml_vk_guess_matmul_pipeline(ctx, mmp, m, n, true)->align;
34453446
}
34463447

34473448
static void ggml_vk_matmul(
@@ -3571,6 +3572,7 @@ static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context&
35713572
(uint32_t)tensor->ne[0], (uint32_t)tensor->ne[1], (uint32_t)tensor->ne[2], (uint32_t)tensor->ne[3], 1 , (uint32_t)tensor->ne[0] , (uint32_t)(tensor->ne[0] * tensor->ne[1]) , (uint32_t)(tensor->ne[0] * tensor->ne[1] * tensor->ne[2]),
35723573
0,
35733574
0.0f, 0.0f,
3575+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35743576
};
35753577
init_pushconst_fastdiv(pc);
35763578
ggml_vk_sync_buffers(subctx);
@@ -3644,10 +3646,10 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
36443646
const int y_ne = ne11 * ne10;
36453647
const int d_ne = ne11 * ne01;
36463648

3647-
const uint32_t kpad = ggml_vk_align_size(ne10, ggml_vk_guess_matmul_pipeline_align(ctx, mmp, ne01, ne11, src0->type));
3649+
const uint32_t kpad = ggml_vk_align_size(ne10, ggml_vk_guess_matmul_pipeline_align(ctx, mmp, ne01, ne11));
36483650
const bool aligned = ne10 == kpad && ne01 > 8 && ne11 > 8;
36493651

3650-
vk_pipeline pipeline = ggml_vk_guess_matmul_pipeline(ctx, mmp, ne01, ne11, aligned, src0->type);
3652+
vk_pipeline pipeline = ggml_vk_guess_matmul_pipeline(ctx, mmp, ne01, ne11, aligned);
36513653

36523654
const uint32_t split_k = ggml_vk_guess_split_k(ctx, ne01, ne11, ne10, pipeline);
36533655

@@ -5351,7 +5353,8 @@ static void ggml_vk_scale(ggml_backend_vk_context * ctx, vk_context& subctx, con
53515353
(uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
53525354
(uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
53535355
0,
5354-
op_params[0], 0.0f
5356+
op_params[0], 0.0f,
5357+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53555358
}, dryrun);
53565359
}
53575360

@@ -5365,6 +5368,7 @@ static void ggml_vk_sqr(ggml_backend_vk_context * ctx, vk_context& subctx, const
53655368
(uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
53665369
0,
53675370
0.0f, 0.0f,
5371+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53685372
}, dryrun);
53695373
}
53705374

@@ -5378,6 +5382,7 @@ static void ggml_vk_sin(ggml_backend_vk_context * ctx, vk_context& subctx, const
53785382
(uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
53795383
0,
53805384
0.0f, 0.0f,
5385+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53815386
}, dryrun);
53825387
}
53835388

@@ -5391,6 +5396,7 @@ static void ggml_vk_cos(ggml_backend_vk_context * ctx, vk_context& subctx, const
53915396
(uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
53925397
0,
53935398
0.0f, 0.0f,
5399+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53945400
}, dryrun);
53955401
}
53965402

@@ -5405,6 +5411,7 @@ static void ggml_vk_clamp(ggml_backend_vk_context * ctx, vk_context& subctx, con
54055411
(uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
54065412
0,
54075413
op_params[0], op_params[1],
5414+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54085415
}, dryrun);
54095416
}
54105417

@@ -5418,6 +5425,7 @@ static void ggml_vk_pad(ggml_backend_vk_context * ctx, vk_context& subctx, const
54185425
(uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
54195426
0,
54205427
0.0f, 0.0f,
5428+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54215429
}, dryrun);
54225430
}
54235431

@@ -5431,6 +5439,7 @@ static void ggml_vk_repeat(ggml_backend_vk_context * ctx, vk_context& subctx, co
54315439
(uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
54325440
0,
54335441
0.0f, 0.0f,
5442+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54345443
}, dryrun);
54355444
}
54365445

@@ -5445,6 +5454,7 @@ static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context& subctx, const
54455454
(uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
54465455
d_offset,
54475456
0.0f, 0.0f,
5457+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54485458
}, dryrun);
54495459
}
54505460

0 commit comments

Comments
 (0)