Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions libc/cmake/modules/LLVMLibCTestRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -476,14 +476,14 @@ function(add_integration_test test_name)

if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
target_link_options(${fq_build_target_name} PRIVATE
${LIBC_COMPILE_OPTIONS_DEFAULT} -Wno-multi-gpu
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
-Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
"-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
"-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}")
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
target_link_options(${fq_build_target_name} PRIVATE
${LIBC_COMPILE_OPTIONS_DEFAULT} -Wno-multi-gpu
"-Wl,--suppress-stack-size-warning"
${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
"-Wl,--suppress-stack-size-warning" -Wno-multi-gpu
"-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1"
"-Wl,-mllvm,-nvptx-emit-init-fini-kernel"
-march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static
Expand Down
33 changes: 33 additions & 0 deletions libc/test/integration/startup/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,36 @@ add_integration_test(
--threads 32
--blocks 8
)

if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
add_integration_test(
startup_rpc_lane_test_w32
SUITE libc-startup-tests
SRCS
rpc_lane_test
LOADER_ARGS
--threads 32
COMPILE_OPTIONS
-mno-wavefrontsize64
)

add_integration_test(
startup_rpc_lane_test_w64
SUITE libc-startup-tests
SRCS
rpc_lane_test.cpp
LOADER_ARGS
--threads 64
COMPILE_OPTIONS
-mwavefrontsize64
)
else()
add_integration_test(
startup_rpc_lane_test_w32
SUITE libc-startup-tests
SRCS
rpc_lane_test.cpp
LOADER_ARGS
--threads 32
)
endif()
36 changes: 36 additions & 0 deletions libc/test/integration/startup/gpu/rpc_lane_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//===-- Loader test to check the RPC interface with the loader ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "include/llvm-libc-types/test_rpc_opcodes_t.h"
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "test/IntegrationTest/test.h"

using namespace LIBC_NAMESPACE;

static void test_add() {
uint64_t cnt = gpu::get_lane_id();
LIBC_NAMESPACE::rpc::Client::Port port =
LIBC_NAMESPACE::rpc::client.open<RPC_TEST_INCREMENT>();
port.send_and_recv(
[=](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) {
reinterpret_cast<uint64_t *>(buffer->data)[0] = cnt;
},
[&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) {
cnt = reinterpret_cast<uint64_t *>(buffer->data)[0];
});
port.close();
EXPECT_EQ(cnt, gpu::get_lane_id() + 1);
EXPECT_EQ(gpu::get_thread_id(), gpu::get_lane_id());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
EXPECT_EQ(gpu::get_thread_id(), gpu::get_lane_id());
EXPECT_EQ(gpu::get_thread_id(), gpu::get_lane_id()) << "Not in same lane";

Preserve the messages from before?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The macro doesn't take a message and since it's not a boolean I can't use the && trick where a const char pointer is evaluated as true.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's a << after the failure

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That macro expands to if (x == y) { ... } which won't accept an operator.

}

TEST_MAIN(int argc, char **argv, char **envp) {
test_add();

return 0;
}
1 change: 1 addition & 0 deletions libc/utils/gpu/loader/amdgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
Object
Option
Support
FrontendOffloading
)

add_llvm_executable(amdhsa-loader amdhsa-loader.cpp)
Expand Down
46 changes: 27 additions & 19 deletions libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#include "hsa/hsa_ext_amd.h"
#endif

#include "llvm/Frontend/Offloading/Utility.h"

#include <atomic>
#include <cstdio>
#include <cstdlib>
Expand Down Expand Up @@ -163,17 +165,13 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
hsa_queue_t *queue, rpc::Server &server,
const LaunchParameters &params,
const char *kernel_name, args_t kernel_args,
bool print_resource_usage) {
uint32_t wavefront_size, bool print_resource_usage) {
// Look up the kernel in the loaded executable.
hsa_executable_symbol_t symbol;
if (hsa_status_t err = hsa_executable_get_symbol_by_name(
executable, kernel_name, &dev_agent, &symbol))
return err;

uint32_t wavefront_size = 0;
if (hsa_status_t err = hsa_agent_get_info(
dev_agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size))
handle_error(err);
// Retrieve different properties of the kernel symbol used for launch.
uint64_t kernel;
uint32_t args_size;
Expand Down Expand Up @@ -419,6 +417,16 @@ int load(int argc, const char **argv, const char **envp, void *image,
dev_agent, &coarsegrained_pool))
handle_error(err);

// The AMDGPU target can change its wavefront size. There currently isn't a
// good way to look this up through the HSA API so we use the LLVM interface.
uint16_t abi_version;
llvm::StringRef image_ref(reinterpret_cast<char *>(image), size);
llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> info_map;
if (llvm::Error err = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
llvm::MemoryBufferRef(image_ref, ""), info_map, abi_version)) {
handle_error(llvm::toString(std::move(err)).c_str());
}

// Allocate fine-grained memory on the host to hold the pointer array for the
// copied argv and allow the GPU agent to access it.
auto allocator = [&](uint64_t size) -> void * {
Expand Down Expand Up @@ -448,10 +456,10 @@ int load(int argc, const char **argv, const char **envp, void *image,
hsa_amd_memory_fill(dev_ret, 0, /*count=*/1);

// Allocate finegrained memory for the RPC server and client to share.
uint32_t wavefront_size = 0;
if (hsa_status_t err = hsa_agent_get_info(
dev_agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size))
handle_error(err);
uint32_t wavefront_size =
llvm::max_element(info_map, [](auto &&x, auto &&y) {
return x.second.WavefrontSize < y.second.WavefrontSize;
})->second.WavefrontSize;

// Set up the RPC server.
void *rpc_buffer;
Expand Down Expand Up @@ -513,7 +521,6 @@ int load(int argc, const char **argv, const char **envp, void *image,
if (HSA_STATUS_SUCCESS ==
hsa_executable_get_symbol_by_name(executable, "__llvm_libc_clock_freq",
&dev_agent, &freq_sym)) {

void *host_clock_freq;
if (hsa_status_t err =
hsa_amd_memory_pool_allocate(finegrained_pool, sizeof(uint64_t),
Expand Down Expand Up @@ -553,16 +560,17 @@ int load(int argc, const char **argv, const char **envp, void *image,

LaunchParameters single_threaded_params = {1, 1, 1, 1, 1, 1};
begin_args_t init_args = {argc, dev_argv, dev_envp};
if (hsa_status_t err = launch_kernel(dev_agent, executable, kernargs_pool,
coarsegrained_pool, queue, server,
single_threaded_params, "_begin.kd",
init_args, print_resource_usage))
if (hsa_status_t err = launch_kernel(
dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
server, single_threaded_params, "_begin.kd", init_args,
info_map["_begin"].WavefrontSize, print_resource_usage))
handle_error(err);

start_args_t args = {argc, dev_argv, dev_envp, dev_ret};
if (hsa_status_t err = launch_kernel(
dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
server, params, "_start.kd", args, print_resource_usage))
server, params, "_start.kd", args, info_map["_start"].WavefrontSize,
print_resource_usage))
handle_error(err);

void *host_ret;
Expand All @@ -580,10 +588,10 @@ int load(int argc, const char **argv, const char **envp, void *image,
int ret = *static_cast<int *>(host_ret);

end_args_t fini_args = {ret};
if (hsa_status_t err = launch_kernel(dev_agent, executable, kernargs_pool,
coarsegrained_pool, queue, server,
single_threaded_params, "_end.kd",
fini_args, print_resource_usage))
if (hsa_status_t err = launch_kernel(
dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
server, single_threaded_params, "_end.kd", fini_args,
info_map["_end"].WavefrontSize, print_resource_usage))
handle_error(err);

if (hsa_status_t err = hsa_amd_memory_pool_free(rpc_buffer))
Expand Down