Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions libc/cmake/modules/LLVMLibCTestRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -476,14 +476,14 @@ function(add_integration_test test_name)

if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
target_link_options(${fq_build_target_name} PRIVATE
${LIBC_COMPILE_OPTIONS_DEFAULT} -Wno-multi-gpu
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
-Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
"-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
"-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}")
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
target_link_options(${fq_build_target_name} PRIVATE
${LIBC_COMPILE_OPTIONS_DEFAULT} -Wno-multi-gpu
"-Wl,--suppress-stack-size-warning"
${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
"-Wl,--suppress-stack-size-warning" -Wno-multi-gpu
"-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1"
"-Wl,-mllvm,-nvptx-emit-init-fini-kernel"
-march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static
Expand Down
33 changes: 33 additions & 0 deletions libc/test/integration/startup/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,36 @@ add_integration_test(
--threads 32
--blocks 8
)

if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
add_integration_test(
startup_rpc_lane_test_w32
SUITE libc-startup-tests
SRCS
rpc_lane_test
LOADER_ARGS
--threads 32
COMPILE_OPTIONS
-mno-wavefrontsize64
)

add_integration_test(
startup_rpc_lane_test_w64
SUITE libc-startup-tests
SRCS
rpc_lane_test.cpp
LOADER_ARGS
--threads 64
COMPILE_OPTIONS
-mwavefrontsize64
)
else()
add_integration_test(
startup_rpc_lane_test_w32
SUITE libc-startup-tests
SRCS
rpc_lane_test.cpp
LOADER_ARGS
--threads 32
)
endif()
36 changes: 36 additions & 0 deletions libc/test/integration/startup/gpu/rpc_lane_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//===-- Loader test to check the RPC interface with the loader ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "include/llvm-libc-types/test_rpc_opcodes_t.h"
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "test/IntegrationTest/test.h"

using namespace LIBC_NAMESPACE;

static void test_add() {
uint64_t cnt = gpu::get_lane_id();
LIBC_NAMESPACE::rpc::Client::Port port =
LIBC_NAMESPACE::rpc::client.open<RPC_TEST_INCREMENT>();
port.send_and_recv(
[=](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) {
reinterpret_cast<uint64_t *>(buffer->data)[0] = cnt;
},
[&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) {
cnt = reinterpret_cast<uint64_t *>(buffer->data)[0];
});
port.close();
ASSERT_TRUE(cnt == gpu::get_lane_id() + 1 && "Incorrect sum");
ASSERT_TRUE(gpu::get_thread_id() == gpu::get_lane_id() && "Not in same lane");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this just gtest? Should be EXPECT_EQ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a little confusing, the libc has integration tests that simply run main with a really small subset of gtest. However, EXPECT_EQ does exist there, so I can use that instead.

}

TEST_MAIN(int argc, char **argv, char **envp) {
test_add();

return 0;
}
1 change: 1 addition & 0 deletions libc/utils/gpu/loader/amdgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
Object
Option
Support
FrontendOffloading
)

add_llvm_executable(amdhsa-loader amdhsa-loader.cpp)
Expand Down
46 changes: 27 additions & 19 deletions libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#include "hsa/hsa_ext_amd.h"
#endif

#include "llvm/Frontend/Offloading/Utility.h"

#include <atomic>
#include <cstdio>
#include <cstdlib>
Expand Down Expand Up @@ -163,17 +165,13 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
hsa_queue_t *queue, rpc::Server &server,
const LaunchParameters &params,
const char *kernel_name, args_t kernel_args,
bool print_resource_usage) {
uint32_t wavefront_size, bool print_resource_usage) {
// Look up the kernel in the loaded executable.
hsa_executable_symbol_t symbol;
if (hsa_status_t err = hsa_executable_get_symbol_by_name(
executable, kernel_name, &dev_agent, &symbol))
return err;

uint32_t wavefront_size = 0;
if (hsa_status_t err = hsa_agent_get_info(
dev_agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size))
handle_error(err);
// Retrieve different properties of the kernel symbol used for launch.
uint64_t kernel;
uint32_t args_size;
Expand Down Expand Up @@ -419,6 +417,16 @@ int load(int argc, const char **argv, const char **envp, void *image,
dev_agent, &coarsegrained_pool))
handle_error(err);

// The AMDGPU target can change its wavefront size. There currently isn't a
// good way to look this up through the HSA API so we use the LLVM interface.
uint16_t abi_version;
llvm::StringRef image_ref(reinterpret_cast<char *>(image), size);
llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> info_map;
if (llvm::Error err = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
llvm::MemoryBufferRef(image_ref, ""), info_map, abi_version)) {
handle_error(llvm::toString(std::move(err)).c_str());
}

// Allocate fine-grained memory on the host to hold the pointer array for the
// copied argv and allow the GPU agent to access it.
auto allocator = [&](uint64_t size) -> void * {
Expand Down Expand Up @@ -448,10 +456,10 @@ int load(int argc, const char **argv, const char **envp, void *image,
hsa_amd_memory_fill(dev_ret, 0, /*count=*/1);

// Allocate finegrained memory for the RPC server and client to share.
uint32_t wavefront_size = 0;
if (hsa_status_t err = hsa_agent_get_info(
dev_agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size))
handle_error(err);
uint32_t wavefront_size =
llvm::max_element(info_map, [](auto &&x, auto &&y) {
return x.second.WavefrontSize < y.second.WavefrontSize;
})->second.WavefrontSize;

// Set up the RPC server.
void *rpc_buffer;
Expand Down Expand Up @@ -513,7 +521,6 @@ int load(int argc, const char **argv, const char **envp, void *image,
if (HSA_STATUS_SUCCESS ==
hsa_executable_get_symbol_by_name(executable, "__llvm_libc_clock_freq",
&dev_agent, &freq_sym)) {

void *host_clock_freq;
if (hsa_status_t err =
hsa_amd_memory_pool_allocate(finegrained_pool, sizeof(uint64_t),
Expand Down Expand Up @@ -553,16 +560,17 @@ int load(int argc, const char **argv, const char **envp, void *image,

LaunchParameters single_threaded_params = {1, 1, 1, 1, 1, 1};
begin_args_t init_args = {argc, dev_argv, dev_envp};
if (hsa_status_t err = launch_kernel(dev_agent, executable, kernargs_pool,
coarsegrained_pool, queue, server,
single_threaded_params, "_begin.kd",
init_args, print_resource_usage))
if (hsa_status_t err = launch_kernel(
dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
server, single_threaded_params, "_begin.kd", init_args,
info_map["_begin"].WavefrontSize, print_resource_usage))
handle_error(err);

start_args_t args = {argc, dev_argv, dev_envp, dev_ret};
if (hsa_status_t err = launch_kernel(
dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
server, params, "_start.kd", args, print_resource_usage))
server, params, "_start.kd", args, info_map["_start"].WavefrontSize,
print_resource_usage))
handle_error(err);

void *host_ret;
Expand All @@ -580,10 +588,10 @@ int load(int argc, const char **argv, const char **envp, void *image,
int ret = *static_cast<int *>(host_ret);

end_args_t fini_args = {ret};
if (hsa_status_t err = launch_kernel(dev_agent, executable, kernargs_pool,
coarsegrained_pool, queue, server,
single_threaded_params, "_end.kd",
fini_args, print_resource_usage))
if (hsa_status_t err = launch_kernel(
dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
server, single_threaded_params, "_end.kd", fini_args,
info_map["_end"].WavefrontSize, print_resource_usage))
handle_error(err);

if (hsa_status_t err = hsa_amd_memory_pool_free(rpc_buffer))
Expand Down