@@ -360,26 +360,27 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
360360
361361void ggml_cuda_flash_attn_ext (ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
362362 ggml_cuda_set_device (ctx.device );
363+ int cc;
363364 switch (ggml_cuda_get_best_fattn_kernel (ggml_cuda_get_device (), dst)) {
364365 case BEST_FATTN_KERNEL_NONE:
365366 GGML_ABORT (" fatal error" );
366367 case BEST_FATTN_KERNEL_TILE:
367- const int cc = ggml_cuda_info ().devices [device].cc ;
368+ cc = ggml_cuda_info ().devices [device].cc ;
368369 GGML_LOG_WARN (" \n CC=%d, TILE_KERNEL\n " ,cc);
369370 ggml_cuda_flash_attn_ext_tile (ctx, dst);
370371 break ;
371372 case BEST_FATTN_KERNEL_VEC:
372- const int cc = ggml_cuda_info ().devices [device].cc ;
373+ cc = ggml_cuda_info ().devices [device].cc ;
373374 GGML_LOG_WARN (" \n CC=%d, VEC_KERNEL\n " ,cc);
374375 ggml_cuda_flash_attn_ext_vec (ctx, dst);
375376 break ;
376377 case BEST_FATTN_KERNEL_WMMA_F16:
377- const int cc = ggml_cuda_info ().devices [device].cc ;
378+ cc = ggml_cuda_info ().devices [device].cc ;
378379 GGML_LOG_WARN (" \n CC=%d, WMMA_KERNEL\n " ,cc);
379380 ggml_cuda_flash_attn_ext_wmma_f16 (ctx, dst);
380381 break ;
381382 case BEST_FATTN_KERNEL_MMA_F16:
382- const int cc = ggml_cuda_info ().devices [device].cc ;
383+ cc = ggml_cuda_info ().devices [device].cc ;
383384 GGML_LOG_WARN (" \n CC=%d, MMA_KERNEL\n " ,cc);
384385 ggml_cuda_flash_attn_ext_mma_f16 (ctx, dst);
385386 break ;
0 commit comments