Skip to content

Commit aa47258

Browse files
committed
Intel support is disabled as the performance is poor.
1 parent a09e8f5 commit aa47258

File tree

1 file changed

+20
-18
lines changed

1 file changed

+20
-18
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3038,29 +3038,25 @@ static void ggml_vk_load_shaders(vk_device& device) {
30383038
uint32_t conv2d_WG_SIZE = 256;
30393039
uint32_t conv2d_BS_K = 128;
30403040
uint32_t conv2d_BS_CRS = 16;
3041-
// Enables subgroup ops for preventing the re-calculation of indices.
3042-
uint32_t use_collectives = 0;
3043-
// CRS block size should be capped at sugroup size for correctness when shuffle is used.
3044-
if(getenv("GGML_VK_USE_COLLECTIVES") != nullptr && device->subgroup_shuffle){
3041+
uint32_t use_collectives = 0; // Enables subgroup ops for preventing the re-calculation of indices.
3042+
if(device->subgroup_shuffle){
30453043
use_collectives = 1;
3046-
conv2d_BS_CRS = std::min(device->subgroup_size, conv2d_BS_CRS);
3044+
conv2d_BS_CRS = std::min(device->subgroup_size, conv2d_BS_CRS); // CRS block size should be capped at sugroup size for correctness when shuffle is used.
30473045
}
30483046
uint32_t conv2d_BS_NPQ = 128;
30493047
uint32_t conv2d_TS_K = 8;
30503048
uint32_t conv2d_shmem_req = (conv2d_BS_K*(conv2d_BS_CRS+1) + conv2d_BS_CRS*(conv2d_BS_NPQ+1))*sizeof(float);
30513049
if(device->properties.limits.maxComputeSharedMemorySize < conv2d_shmem_req){
30523050
conv2d_BS_CRS = 8;
3053-
if(getenv("GGML_VK_USE_COLLECTIVES") != nullptr && device->subgroup_shuffle){
3051+
if(device->subgroup_shuffle){
30543052
conv2d_BS_CRS = std::min(device->subgroup_size, conv2d_BS_CRS);
30553053
}
30563054
}
3057-
3058-
std::cerr << " --> BS_CRS=" << conv2d_BS_CRS << " use_collectives=" << use_collectives << std::endl;
30593055

3060-
if(device->subgroup_shuffle){
3056+
if(use_collectives){
30613057
ggml_vk_create_pipeline(device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3, sizeof(vk_op_conv2d_push_constants), {conv2d_BS_K, conv2d_BS_NPQ, 1}, {conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives}, 1, true, true);
30623058
}else{
3063-
ggml_vk_create_pipeline(device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3, sizeof(vk_op_conv2d_push_constants), {conv2d_BS_K, conv2d_BS_NPQ, 1}, {conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives}, 1, true);
3059+
ggml_vk_create_pipeline(device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3, sizeof(vk_op_conv2d_push_constants), {conv2d_BS_K, conv2d_BS_NPQ, 1}, {conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives}, 1, true, false);
30643060
}
30653061

30663062
ggml_vk_create_pipeline(device, device->pipeline_conv2d_dw_whcn_f32, "conv2d_dw_whcn_f32", conv2d_dw_whcn_f32_len, conv2d_dw_whcn_f32_data, "main", 3, sizeof(vk_op_conv2d_dw_push_constants), {512, 1, 1}, {}, 1);
@@ -10820,14 +10816,20 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
1082010816
return true;
1082110817
case GGML_OP_CONV_TRANSPOSE_1D:
1082210818
return op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_F32;
10823-
case GGML_OP_CONV_2D:
10824-
// Channel-contiguous format is not supported yet.
10825-
return (op->src[0]->type == GGML_TYPE_F32 &&
10826-
op->src[1]->type == GGML_TYPE_F32 &&
10827-
op->type == GGML_TYPE_F32 &&
10828-
ggml_is_contiguous(op->src[0]) &&
10829-
ggml_is_contiguous(op->src[1]) &&
10830-
ggml_is_contiguous(op));
10819+
case GGML_OP_CONV_2D:
10820+
{
10821+
// Op is disabled for Intel
10822+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
10823+
const vk_device& device = ggml_vk_get_device(ctx->device);
10824+
bool is_Intel = ggml_vk_get_device(ctx->device)->vendor_id == VK_VENDOR_ID_INTEL;
10825+
// Channel-contiguous format is not supported yet.
10826+
return (op->src[0]->type == GGML_TYPE_F32 &&
10827+
op->src[1]->type == GGML_TYPE_F32 &&
10828+
op->type == GGML_TYPE_F32 &&
10829+
ggml_is_contiguous(op->src[0]) &&
10830+
ggml_is_contiguous(op->src[1]) &&
10831+
ggml_is_contiguous(op)) && !is_Intel;
10832+
}
1083110833
default:
1083210834
return false;
1083310835
}

0 commit comments

Comments
 (0)