Skip to content

Commit 9be099b

Browse files
jeffbolznvpwilkin
authored andcommitted
vulkan: add RTE variants of exp shader (ggml-org#16165)
This fixes some failures on Turing where "round to zero" rounds to the max f16 value but the CPU reference value is infinite.
1 parent b7053db commit 9be099b

File tree

3 files changed

+17
-3
lines changed

3 files changed

+17
-3
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3391,7 +3391,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
33913391
ggml_vk_create_pipeline(device, device->pipeline_ ## name [0], #name "_f32", name ## _f32_len, name ## _f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); \
33923392
ggml_vk_create_pipeline(device, device->pipeline_ ## name [1], #name "_f16", name ## _f16_len, name ## _f16_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1);
33933393

3394-
CREATE_UNARY(exp)
33953394
CREATE_UNARY(gelu)
33963395
CREATE_UNARY(gelu_erf)
33973396
CREATE_UNARY(gelu_quick)
@@ -3403,6 +3402,17 @@ static void ggml_vk_load_shaders(vk_device& device) {
34033402
CREATE_UNARY(hardswish)
34043403
#undef CREATE_UNARY
34053404

3405+
#define CREATE_UNARY_RTE(name) \
3406+
if (device->float_controls_rte_fp16) { \
3407+
ggml_vk_create_pipeline(device, device->pipeline_ ## name [0], #name "_f32_rte", name ## _f32_rte_len, name ## _f32_rte_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); \
3408+
ggml_vk_create_pipeline(device, device->pipeline_ ## name [1], #name "_f16_rte", name ## _f16_rte_len, name ## _f16_rte_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); \
3409+
} else { \
3410+
ggml_vk_create_pipeline(device, device->pipeline_ ## name [0], #name "_f32", name ## _f32_len, name ## _f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); \
3411+
ggml_vk_create_pipeline(device, device->pipeline_ ## name [1], #name "_f16", name ## _f16_len, name ## _f16_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); \
3412+
}
3413+
CREATE_UNARY_RTE(exp)
3414+
#undef CREATE_UNARY_RTE
3415+
34063416
#define CREATE_GLU(name) \
34073417
if (device->float_controls_rte_fp16) { \
34083418
ggml_vk_create_pipeline(device, device->pipeline_ ## name [0], #name "_f32_rte", name ## _f32_rte_len, name ## _f32_rte_data, "main", 3, sizeof(vk_op_glu_push_constants), {512, 1, 1}, {}, 1, true); \

ggml/src/ggml-vulkan/vulkan-shaders/exp.comp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#version 450
22

3+
#include "rte.comp"
34
#include "generic_head.comp"
45
#include "types.comp"
56

ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -704,8 +704,11 @@ void process_shaders() {
704704

705705
string_to_spv("upscale_f32", "upscale.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
706706

707-
string_to_spv("exp_f16", "exp.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
708-
string_to_spv("exp_f32", "exp.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
707+
for (auto rte : {false, true}) {
708+
std::string suffix = rte ? "_rte" : "";
709+
string_to_spv("exp_f16" + suffix, "exp.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", rte ? "1" : "0"}});
710+
string_to_spv("exp_f32" + suffix, "exp.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"} , {"RTE16", rte ? "1" : "0"}});
711+
}
709712
string_to_spv("gelu_f16", "gelu.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
710713
string_to_spv("gelu_f32", "gelu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
711714
string_to_spv("gelu_erf_f16", "gelu_erf.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});

0 commit comments

Comments
 (0)