@@ -123,13 +123,13 @@ void ggml_cuda_cross_entropy_loss(ggml_backend_cuda_context & ctx, ggml_tensor *
123123 ggml_cuda_pool_alloc<float > dst_tmp (pool, blocks_num.x );
124124
125125 if (nbytes_shared <= smpbo) {
126- #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__))
126+ #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA)
127127 static bool shared_memory_limit_raised[GGML_CUDA_MAX_DEVICES] = {false };
128128 if (!shared_memory_limit_raised[id]) {
129- CUDA_CHECK (cudaFuncSetAttribute (cross_entropy_loss_back_f32 <true >, cudaFuncAttributeMaxDynamicSharedMemorySize, smpbo));
129+ CUDA_CHECK (cudaFuncSetAttribute (cross_entropy_loss_f32 <true >, cudaFuncAttributeMaxDynamicSharedMemorySize, smpbo));
130130 shared_memory_limit_raised[id] = true ;
131131 }
132- #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__))
132+ #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA)
133133 cross_entropy_loss_f32<true ><<<blocks_num, blocks_dim, nbytes_shared, stream>>> (src0_d, src1_d, dst_tmp.ptr , ne00, nrows);
134134 } else {
135135 cross_entropy_loss_f32<false ><<<blocks_num, blocks_dim, 0 , stream>>> (src0_d, src1_d, dst_tmp.ptr , ne00, nrows);
@@ -175,13 +175,13 @@ void ggml_cuda_cross_entropy_loss_back(ggml_backend_cuda_context & ctx, ggml_ten
175175 const size_t smpbo = ggml_cuda_info ().devices [id].smpbo ;
176176
177177 if (nbytes_shared <= smpbo) {
178- #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__))
178+ #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA)
179179 static bool shared_memory_limit_raised[GGML_CUDA_MAX_DEVICES] = {false };
180180 if (!shared_memory_limit_raised[id]) {
181181 CUDA_CHECK (cudaFuncSetAttribute (cross_entropy_loss_back_f32<true >, cudaFuncAttributeMaxDynamicSharedMemorySize, smpbo));
182182 shared_memory_limit_raised[id] = true ;
183183 }
184- #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__))
184+ #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA)
185185 cross_entropy_loss_back_f32<true ><<<blocks_num, blocks_dim, nbytes_shared, stream>>> (grad_d, src0f_d, src1f_d, dst_d, ne00);
186186 } else {
187187 cross_entropy_loss_back_f32<false ><<<blocks_num, blocks_dim, 0 , stream>>> (grad_d, src0f_d, src1f_d, dst_d, ne00);
0 commit comments