2828#include " hsa/hsa_ext_amd.h"
2929#endif
3030
31+ #include " llvm/Frontend/Offloading/Utility.h"
32+
3133#include < atomic>
3234#include < cstdio>
3335#include < cstdlib>
@@ -163,17 +165,13 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
163165 hsa_queue_t *queue, rpc::Server &server,
164166 const LaunchParameters ¶ms,
165167 const char *kernel_name, args_t kernel_args,
166- bool print_resource_usage) {
168+ uint32_t wavefront_size, bool print_resource_usage) {
167169 // Look up the kernel in the loaded executable.
168170 hsa_executable_symbol_t symbol;
169171 if (hsa_status_t err = hsa_executable_get_symbol_by_name (
170172 executable, kernel_name, &dev_agent, &symbol))
171173 return err;
172174
173- uint32_t wavefront_size = 0 ;
174- if (hsa_status_t err = hsa_agent_get_info (
175- dev_agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size))
176- handle_error (err);
177175 // Retrieve different properties of the kernel symbol used for launch.
178176 uint64_t kernel;
179177 uint32_t args_size;
@@ -419,6 +417,16 @@ int load(int argc, const char **argv, const char **envp, void *image,
419417 dev_agent, &coarsegrained_pool))
420418 handle_error (err);
421419
420+ // The AMDGPU target can change its wavefront size. There currently isn't a
421+ // good way to look this up through the HSA API so we use the LLVM interface.
422+ uint16_t abi_version;
423+ llvm::StringRef image_ref (reinterpret_cast <char *>(image), size);
424+ llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> info_map;
425+ if (llvm::Error err = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage (
426+ llvm::MemoryBufferRef (image_ref, " " ), info_map, abi_version)) {
427+ handle_error (llvm::toString (std::move (err)).c_str ());
428+ }
429+
422430 // Allocate fine-grained memory on the host to hold the pointer array for the
423431 // copied argv and allow the GPU agent to access it.
424432 auto allocator = [&](uint64_t size) -> void * {
@@ -448,10 +456,10 @@ int load(int argc, const char **argv, const char **envp, void *image,
448456 hsa_amd_memory_fill (dev_ret, 0 , /* count=*/ 1 );
449457
450458 // Allocate finegrained memory for the RPC server and client to share.
451- uint32_t wavefront_size = 0 ;
452- if ( hsa_status_t err = hsa_agent_get_info (
453- dev_agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size))
454- handle_error (err) ;
459+ uint32_t wavefront_size =
460+ llvm::max_element (info_map, []( auto &&x, auto &&y) {
461+ return x. second . WavefrontSize < y. second . WavefrontSize ;
462+ })-> second . WavefrontSize ;
455463
456464 // Set up the RPC server.
457465 void *rpc_buffer;
@@ -513,7 +521,6 @@ int load(int argc, const char **argv, const char **envp, void *image,
513521 if (HSA_STATUS_SUCCESS ==
514522 hsa_executable_get_symbol_by_name (executable, " __llvm_libc_clock_freq" ,
515523 &dev_agent, &freq_sym)) {
516-
517524 void *host_clock_freq;
518525 if (hsa_status_t err =
519526 hsa_amd_memory_pool_allocate (finegrained_pool, sizeof (uint64_t ),
@@ -553,16 +560,17 @@ int load(int argc, const char **argv, const char **envp, void *image,
553560
554561 LaunchParameters single_threaded_params = {1 , 1 , 1 , 1 , 1 , 1 };
555562 begin_args_t init_args = {argc, dev_argv, dev_envp};
556- if (hsa_status_t err = launch_kernel (dev_agent, executable, kernargs_pool,
557- coarsegrained_pool, queue, server ,
558- single_threaded_params, " _begin.kd" ,
559- init_args , print_resource_usage))
563+ if (hsa_status_t err = launch_kernel (
564+ dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
565+ server, single_threaded_params, " _begin.kd" , init_args ,
566+ info_map[ " _begin " ]. WavefrontSize , print_resource_usage))
560567 handle_error (err);
561568
562569 start_args_t args = {argc, dev_argv, dev_envp, dev_ret};
563570 if (hsa_status_t err = launch_kernel (
564571 dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
565- server, params, " _start.kd" , args, print_resource_usage))
572+ server, params, " _start.kd" , args, info_map[" _start" ].WavefrontSize ,
573+ print_resource_usage))
566574 handle_error (err);
567575
568576 void *host_ret;
@@ -580,10 +588,10 @@ int load(int argc, const char **argv, const char **envp, void *image,
580588 int ret = *static_cast <int *>(host_ret);
581589
582590 end_args_t fini_args = {ret};
583- if (hsa_status_t err = launch_kernel (dev_agent, executable, kernargs_pool,
584- coarsegrained_pool, queue, server ,
585- single_threaded_params, " _end.kd" ,
586- fini_args , print_resource_usage))
591+ if (hsa_status_t err = launch_kernel (
592+ dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
593+ server, single_threaded_params, " _end.kd" , fini_args ,
594+ info_map[ " _end " ]. WavefrontSize , print_resource_usage))
587595 handle_error (err);
588596
589597 if (hsa_status_t err = hsa_amd_memory_pool_free (rpc_buffer))
0 commit comments