@@ -513,6 +513,7 @@ struct vk_device_struct {
513513 vk_queue compute_queue;
514514 vk_queue transfer_queue;
515515 bool single_queue;
516+ bool support_async;
516517 uint32_t subgroup_size;
517518 uint32_t shader_core_count;
518519 bool uma;
@@ -4273,6 +4274,16 @@ static vk_device ggml_vk_get_device(size_t idx) {
42734274 device->vendor_id = device->properties.vendorID;
42744275 device->driver_id = driver_props.driverID;
42754276
4277+ // Implementing the async backend interfaces seems broken on older Intel HW,
4278+ // see https://github.com/ggml-org/llama.cpp/issues/17302.
4279+ device->support_async = (device->vendor_id != VK_VENDOR_ID_INTEL ||
4280+ std::string(device->properties.deviceName.data()).find("(DG1)") == std::string::npos) &&
4281+ getenv("GGML_VK_DISABLE_ASYNC") == nullptr;
4282+
4283+ if (!device->support_async) {
4284+ GGML_LOG_DEBUG("ggml_vulkan: WARNING: Async execution disabled on certain Intel devices.\n");
4285+ }
4286+
42764287 const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE");
42774288
42784289 if (GGML_VK_FORCE_MAX_ALLOCATION_SIZE != nullptr) {
@@ -13187,6 +13198,10 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
1318713198 ctx->device->perf_logger->print_timings();
1318813199 }
1318913200
13201+ if (!ctx->device->support_async) {
13202+ ggml_vk_synchronize(ctx);
13203+ }
13204+
1319013205 return GGML_STATUS_SUCCESS;
1319113206
1319213207 UNUSED(backend);
@@ -13480,6 +13495,10 @@ ggml_backend_t ggml_backend_vk_init(size_t dev_num) {
1348013495 /* .context = */ ctx,
1348113496 };
1348213497
13498+ if (!ctx->device->support_async) {
13499+ vk_backend->iface.get_tensor_async = nullptr;
13500+ }
13501+
1348313502 return vk_backend;
1348413503}
1348513504
0 commit comments