Skip to content

Commit 4ec0e68

Browse files
authored
check that number of unary ops matches in debug
ggml-ci
1 parent fd1c028 commit 4ec0e68

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2768,6 +2768,11 @@ static void update_cuda_graph_executable(ggml_backend_cuda_context * cuda_ctx) {
27682768
#endif
27692769

27702770
static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std::initializer_list<enum ggml_op> ops, std::initializer_list<enum ggml_unary_op> unary_ops) {
2771+
#ifndef NDEBUG
2772+
const size_t num_unary = std::count(ops.begin(), ops.end(), GGML_OP_UNARY);
2773+
GGML_ASSERT(unary_ops.size() == num_unary);
2774+
#endif
2775+
27712776
if (!ggml_can_fuse(cgraph, node_idx, ops)) {
27722777
return false;
27732778
}
@@ -2802,16 +2807,12 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
28022807
if (ops.size() == 3 && ops.begin()[0] == GGML_OP_SCALE && ops.begin()[1] == GGML_OP_UNARY && ops.begin()[2] == GGML_OP_SCALE
28032808
&& unary_ops.size() == 1 && unary_ops.begin()[0] == GGML_UNARY_OP_TANH) {
28042809
const ggml_tensor *scale = cgraph->nodes[node_idx];
2805-
const ggml_tensor *tanh = cgraph->nodes[node_idx+1];
28062810
const ggml_tensor *scale2 = cgraph->nodes[node_idx+2];
28072811

28082812
GGML_ASSERT(scale->src[0]->type == GGML_TYPE_F32);
28092813
GGML_ASSERT(scale->type == GGML_TYPE_F32);
28102814

2811-
if (tanh->src[0] != scale || scale2->src[0] != tanh) {
2812-
return false;
2813-
}
2814-
2815+
// Check for bias
28152816
if (ggml_get_op_params_f32(scale, 1) != 0.0f || ggml_get_op_params_f32(scale2, 1) != 0.0f) {
28162817
return false;
28172818
}

ggml/src/ggml-cuda/softcap.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ static void softcap_f32_cuda(const float * x, float * dst, const float scale, co
1515
softcap_f32<<<num_blocks, CUDA_SOFTCAP_BLOCK_SIZE, 0, stream>>>(x, dst, scale, softcap, k);
1616
}
1717

18+
// fused GGML_OP_SCALE + GGML_UNARY_OP_TANH + GGML_OP_SCALE
1819
void ggml_cuda_op_softcap(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * src) {
1920
const ggml_tensor * src0 = src->src[0];
2021
const float * src0_d = (const float *)src0->data;

0 commit comments

Comments
 (0)