Skip to content

Commit 3b8a08c

Browse files
committed
metal : remove GGML_METAL_USE_BF16
ggml-ci
1 parent 81f5653 commit 3b8a08c

File tree

7 files changed

+47
-60
lines changed

7 files changed

+47
-60
lines changed

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ SRC=`pwd`
4545
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON"
4646

4747
if [ ! -z ${GG_BUILD_METAL} ]; then
48-
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_USE_BF16=ON"
48+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
4949
fi
5050

5151
if [ ! -z ${GG_BUILD_CUDA} ]; then

ggml/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ option(GGML_WEBGPU "ggml: use WebGPU"
190190
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
191191
option(GGML_ZDNN "ggml: use zDNN" OFF)
192192
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
193-
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
194193
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
195194
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
196195
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL})

ggml/src/ggml-metal/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@ if (GGML_METAL_NDEBUG)
2121
add_compile_definitions(GGML_METAL_NDEBUG)
2222
endif()
2323

24-
if (GGML_METAL_USE_BF16)
25-
add_compile_definitions(GGML_METAL_USE_BF16)
26-
endif()
27-
2824
# copy metal files to bin directory
2925
configure_file(../ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY)
3026
configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)

ggml/src/ggml-metal/ggml-metal-device.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ struct ggml_backend_metal_device_props {
1919
bool has_simdgroup_mm;
2020
bool has_unified_memory;
2121
bool has_bfloat;
22-
bool use_bfloat;
2322
bool use_residency_sets;
2423
bool use_shared_buffers;
2524

ggml/src/ggml-metal/ggml-metal-device.m

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,6 @@ ggml_backend_metal_device_t ggml_backend_metal_device_init(void) {
4444
ctx->props.has_bfloat = [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
4545
ctx->props.has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6];
4646

47-
#if defined(GGML_METAL_USE_BF16)
48-
ctx->props.use_bfloat = ctx->props.has_bfloat;
49-
#else
50-
ctx->props.use_bfloat = false;
51-
#endif
52-
5347
ctx->props.use_residency_sets = true;
5448
#if defined(GGML_METAL_HAS_RESIDENCY_SETS)
5549
ctx->props.use_residency_sets = getenv("GGML_METAL_NO_RESIDENCY") == nil;
@@ -172,8 +166,8 @@ ggml_backend_metal_device_t ggml_backend_metal_device_init(void) {
172166
// dictionary of preprocessor macros
173167
NSMutableDictionary * prep = [NSMutableDictionary dictionary];
174168

175-
if (ctx->props.use_bfloat) {
176-
[prep setObject:@"1" forKey:@"GGML_METAL_USE_BF16"];
169+
if (ctx->props.has_bfloat) {
170+
[prep setObject:@"1" forKey:@"GGML_METAL_HAS_BF16"];
177171
}
178172

179173
#if GGML_METAL_EMBED_LIBRARY
@@ -238,7 +232,6 @@ ggml_backend_metal_device_t ggml_backend_metal_device_init(void) {
238232
GGML_LOG_INFO("%s: simdgroup matrix mul. = %s\n", __func__, ctx->props.has_simdgroup_mm ? "true" : "false");
239233
GGML_LOG_INFO("%s: has unified memory = %s\n", __func__, ctx->props.has_unified_memory ? "true" : "false");
240234
GGML_LOG_INFO("%s: has bfloat = %s\n", __func__, ctx->props.has_bfloat ? "true" : "false");
241-
GGML_LOG_INFO("%s: use bfloat = %s\n", __func__, ctx->props.use_bfloat ? "true" : "false");
242235
GGML_LOG_INFO("%s: use residency sets = %s\n", __func__, ctx->props.use_residency_sets ? "true" : "false");
243236
GGML_LOG_INFO("%s: use shared buffers = %s\n", __func__, ctx->props.use_shared_buffers ? "true" : "false");
244237

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,7 @@ - (void) dealloc {
381381
// additional, inference-time compiled kernels
382382
NSMutableDictionary * kernels_ext;
383383

384+
bool use_bfloat;
384385
bool use_fusion;
385386
bool use_concurrency;
386387
bool use_graph_optimize;
@@ -487,6 +488,7 @@ @implementation GGMLMetalClass
487488

488489
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
489490

491+
ctx->use_bfloat = ctx->props_dev.has_bfloat;
490492
ctx->use_fusion = getenv("GGML_METAL_FUSION_DISABLE") == nil;
491493
ctx->use_concurrency = getenv("GGML_METAL_CONCURRENCY_DISABLE") == nil;
492494

@@ -508,6 +510,7 @@ @implementation GGMLMetalClass
508510

509511
memset(ctx->fuse_cnt, 0, sizeof(ctx->fuse_cnt));
510512

513+
GGML_LOG_INFO("%s: use bfloat = %s\n", __func__, ctx->use_bfloat ? "true" : "false");
511514
GGML_LOG_INFO("%s: use fusion = %s\n", __func__, ctx->use_fusion ? "true" : "false");
512515
GGML_LOG_INFO("%s: use concurrency = %s\n", __func__, ctx->use_concurrency ? "true" : "false");
513516
GGML_LOG_INFO("%s: use graph optimize = %s\n", __func__, ctx->use_graph_optimize ? "true" : "false");
@@ -557,7 +560,7 @@ @implementation GGMLMetalClass
557560

558561
const bool has_simdgroup_mm = ctx->props_dev.has_simdgroup_mm;
559562
const bool has_simdgroup_reduction = ctx->props_dev.has_simdgroup_reduction;
560-
const bool use_bfloat = ctx->props_dev.use_bfloat;
563+
const bool has_bfloat = ctx->props_dev.has_bfloat;
561564

562565
// simd_sum and simd_max requires MTLGPUFamilyApple7
563566

@@ -595,7 +598,7 @@ @implementation GGMLMetalClass
595598
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_DIAG_MASK_INF_8, diag_mask_inf_8, true);
596599
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_F32, get_rows_f32, true);
597600
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_F16, get_rows_f16, true);
598-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_BF16, get_rows_bf16, use_bfloat);
601+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_BF16, get_rows_bf16, has_bfloat);
599602
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q4_0, get_rows_q4_0, true);
600603
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q4_1, get_rows_q4_1, true);
601604
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q5_0, get_rows_q5_0, true);
@@ -619,7 +622,7 @@ @implementation GGMLMetalClass
619622
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_I32, get_rows_i32, true);
620623
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_F32, set_rows_f32, true);
621624
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_F16, set_rows_f16, true);
622-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_BF16, set_rows_bf16, use_bfloat);
625+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_BF16, set_rows_bf16, has_bfloat);
623626
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_Q8_0, set_rows_q8_0, true);
624627
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_Q4_0, set_rows_q4_0, true);
625628
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_Q4_1, set_rows_q4_1, true);
@@ -636,11 +639,11 @@ @implementation GGMLMetalClass
636639
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_RWKV_WKV7_F32, rwkv_wkv7_f32, true);
637640
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32, mul_mv_f32_f32, has_simdgroup_reduction);
638641
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32_C4, mul_mv_f32_f32_c4, true);
639-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32, mul_mv_bf16_f32, has_simdgroup_reduction && use_bfloat);
640-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_C4, mul_mv_bf16_f32_c4, use_bfloat);
641-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_1ROW, mul_mv_bf16_f32_1row, has_simdgroup_reduction && use_bfloat);
642-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_L4, mul_mv_bf16_f32_l4, has_simdgroup_reduction && use_bfloat);
643-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_BF16, mul_mv_bf16_bf16, has_simdgroup_reduction && use_bfloat);
642+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32, mul_mv_bf16_f32, has_simdgroup_reduction && has_bfloat);
643+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_C4, mul_mv_bf16_f32_c4, has_bfloat);
644+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_1ROW, mul_mv_bf16_f32_1row, has_simdgroup_reduction && has_bfloat);
645+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_L4, mul_mv_bf16_f32_l4, has_simdgroup_reduction && has_bfloat);
646+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_BF16, mul_mv_bf16_bf16, has_simdgroup_reduction && has_bfloat);
644647
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32, mul_mv_f16_f32, has_simdgroup_reduction);
645648
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_C4, mul_mv_f16_f32_c4, true);
646649
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_1ROW, mul_mv_f16_f32_1row, has_simdgroup_reduction);
@@ -719,7 +722,7 @@ @implementation GGMLMetalClass
719722
//GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F32_1ROW, mul_mv_id_f16_f32_1row, has_simdgroup_reduction);
720723
//GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F32_L4, mul_mv_id_f16_f32_l4, has_simdgroup_reduction);
721724
//GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F16, mul_mv_id_f16_f16, has_simdgroup_reduction);
722-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_BF16_F32, mul_mv_id_bf16_f32, has_simdgroup_reduction && use_bfloat);
725+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_BF16_F32, mul_mv_id_bf16_f32, has_simdgroup_reduction && has_bfloat);
723726
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q4_0_F32, mul_mv_id_q4_0_f32, has_simdgroup_reduction);
724727
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q4_1_F32, mul_mv_id_q4_1_f32, has_simdgroup_reduction);
725728
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q5_0_F32, mul_mv_id_q5_0_f32, has_simdgroup_reduction);
@@ -742,7 +745,7 @@ @implementation GGMLMetalClass
742745
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ4_XS_F32, mul_mv_id_iq4_xs_f32, has_simdgroup_reduction);
743746
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_F32_F32, mul_mm_f32_f32, has_simdgroup_mm);
744747
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_F16_F32, mul_mm_f16_f32, has_simdgroup_mm);
745-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_BF16_F32, mul_mm_bf16_f32, has_simdgroup_mm && use_bfloat);
748+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_BF16_F32, mul_mm_bf16_f32, has_simdgroup_mm && has_bfloat);
746749
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q4_0_F32, mul_mm_q4_0_f32, has_simdgroup_mm);
747750
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q4_1_F32, mul_mm_q4_1_f32, has_simdgroup_mm);
748751
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q5_0_F32, mul_mm_q5_0_f32, has_simdgroup_mm);
@@ -772,7 +775,7 @@ @implementation GGMLMetalClass
772775
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_MAP0_F16_NE20_16, mul_mm_id_map0_f16_ne20_16, has_simdgroup_mm);
773776
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F32_F16, mul_mm_id_f32_f16, has_simdgroup_mm);
774777
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F16_F16, mul_mm_id_f16_f16, has_simdgroup_mm);
775-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_BF16_F16, mul_mm_id_bf16_f16, has_simdgroup_mm && use_bfloat);
778+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_BF16_F16, mul_mm_id_bf16_f16, has_simdgroup_mm && has_bfloat);
776779
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q4_0_F16, mul_mm_id_q4_0_f16, has_simdgroup_mm);
777780
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q4_1_F16, mul_mm_id_q4_1_f16, has_simdgroup_mm);
778781
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q5_0_F16, mul_mm_id_q5_0_f16, has_simdgroup_mm);
@@ -817,11 +820,11 @@ @implementation GGMLMetalClass
817820
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_LEAKY_RELU_F32, leaky_relu_f32, true);
818821
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_F32, cpy_f32_f32, true);
819822
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_F16, cpy_f32_f16, true);
820-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_BF16, cpy_f32_bf16, use_bfloat);
823+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_BF16, cpy_f32_bf16, has_bfloat);
821824
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F16_F32, cpy_f16_f32, true);
822825
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F16_F16, cpy_f16_f16, true);
823-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_BF16_F32, cpy_bf16_f32, use_bfloat);
824-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_BF16_BF16, cpy_bf16_bf16, use_bfloat);
826+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_BF16_F32, cpy_bf16_f32, has_bfloat);
827+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_BF16_BF16, cpy_bf16_bf16, has_bfloat);
825828
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_I32, cpy_f32_i32, true);
826829
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_I32_F32, cpy_i32_f32, true);
827830
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0, cpy_f32_q8_0, true);
@@ -1348,9 +1351,9 @@ static void ggml_backend_metal_buffer_rset_free(struct ggml_backend_metal_buffer
13481351
static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_props * props_dev, const struct ggml_tensor * op) {
13491352
const bool has_simdgroup_mm = props_dev->has_simdgroup_mm;
13501353
const bool has_simdgroup_reduction = props_dev->has_simdgroup_reduction;
1351-
const bool use_bfloat = props_dev->use_bfloat;
1354+
const bool has_bfloat = props_dev->has_bfloat;
13521355

1353-
if (!use_bfloat) {
1356+
if (!has_bfloat) {
13541357
if (op->type == GGML_TYPE_BF16) {
13551358
return false;
13561359
}
@@ -6088,9 +6091,9 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
60886091

60896092
const int n_nodes_per_cb = ctx->n_nodes_per_cb;
60906093

6091-
id<MTLCommandBuffer> cmd_buf = ctx->cmd_bufs[cb_idx].obj;
6092-
struct ggml_mem_ranges * mem_ranges = ctx->cmd_bufs[cb_idx].mem_ranges;
6094+
id<MTLCommandBuffer> cmd_buf = ctx->cmd_bufs[cb_idx].obj;
60936095

6096+
struct ggml_mem_ranges * mem_ranges = ctx->cmd_bufs[cb_idx].mem_ranges;
60946097
if (mem_ranges) {
60956098
ggml_mem_ranges_reset(mem_ranges);
60966099
}
@@ -6467,9 +6470,6 @@ static ggml_backend_dev_t ggml_backend_metal_reg_device_get(ggml_backend_reg_t r
64676470
static struct ggml_backend_feature g_ggml_backend_metal_features[] = {
64686471
#if defined(GGML_METAL_EMBED_LIBRARY)
64696472
{ "EMBED_LIBRARY", "1" },
6470-
#endif
6471-
#if defined(GGML_METAL_USE_BF16)
6472-
{ "BF16", "1" },
64736473
#endif
64746474
{ nil, nil },
64756475
};

0 commit comments

Comments
 (0)