Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions ggml/src/ggml-cuda/softmax.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ __device__ float __forceinline__ t2f32<half>(half val) {
return __half2float(val);
}

// When ncols_template == 0 the bounds for the loops in this function are not known and can't be unrolled.
// As we want to keep pragma unroll for all other cases we supress the clang transformation warning here.
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wpass-failed"
#endif
template <bool use_shared, int ncols_template, int block_size_template, typename T>
static __global__ void soft_max_f32(
const float * x, const T * mask, float * dst, const int ncols_par, const int nrows_y,
Expand Down Expand Up @@ -118,6 +124,9 @@ static __global__ void soft_max_f32(
dst[col] = vals[col] * inv_sum;
}
}
#ifdef __clang__
#pragma clang diagnostic pop
#endif

static __global__ void soft_max_back_f32(
const float * grad, const float * dstf, float * dst, const int ncols, const float scale) {
Expand Down
Loading