diff --git a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp b/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp index 1beef8170475a..f6ce598cd7102 100644 --- a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp +++ b/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp @@ -281,6 +281,7 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, // Initialize the packet header and set the doorbell signal to begin execution // by the HSA runtime. uint16_t header = + 1u << HSA_PACKET_HEADER_BARRIER | (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) | (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE); @@ -540,11 +541,11 @@ int load(int argc, const char **argv, const char **envp, void *image, } } - // Obtain a queue with the minimum (power of two) size, used to send commands + // Obtain a queue with the maximum (power of two) size, used to send commands // to the HSA runtime and launch execution on the device. uint64_t queue_size; if (hsa_status_t err = hsa_agent_get_info( - dev_agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE, &queue_size)) + dev_agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size)) handle_error(err); hsa_queue_t *queue = nullptr; if (hsa_status_t err =