Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions libc/docs/gpu/rpc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ but the following example shows how it can be used by a standard user.

#include <shared/rpc.h>
#include <shared/rpc_opcodes.h>
#include <shared/rpc_server.h>

[[noreturn]] void handle_error(cudaError_t err) {
fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(err));
Expand Down Expand Up @@ -230,10 +231,10 @@ but the following example shows how it can be used by a standard user.
// Requires non-blocking CUDA kernels but avoids a separate thread.
do {
auto port = server.try_open(warp_size, /*index=*/0);
// From libllvmlibc_rpc_server.a in the installation.
if (!port)
continue;

// Only available in-tree from the 'libc' sources.
handle_libc_opcodes(*port, warp_size);
port->close();
} while (cudaStreamQuery(stream) == cudaErrorNotReady);
Expand All @@ -242,14 +243,16 @@ but the following example shows how it can be used by a standard user.
The above code must be compiled in CUDA's relocatable device code mode and with
the advanced offloading driver to link in the library. Currently this can be
done with the following invocation. Using LTO avoids the overhead normally
associated with relocatable device code linking. The C library for GPUs is
linked in by forwarding the static library to the device-side link job.
associated with relocatable device code linking. The C library for GPU's
handling is included through the ``shared/`` directory. This is not currently
installed as it does not use a stable interface.


.. code-block:: sh

$> clang++ -x cuda rpc.cpp --offload-arch=native -fgpu-rdc -lcudart \
-I<install-path>include -L<install-path>/lib -lllvmlibc_rpc_server \
-Xoffload-linker -lc -O3 -foffload-lto -o hello
-I<install-path>include -L<install-path>/lib -Xoffload-linker -lc \
-O3 -foffload-lto -o hello
$> ./hello
Hello world!

Expand Down
6 changes: 0 additions & 6 deletions libc/shared/rpc_opcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,4 @@ typedef enum {

#undef LLVM_LIBC_OPCODE

namespace rpc {
// The implementation of this function currently lives in the utility directory
// at 'utils/gpu/server/rpc_server.cpp'.
rpc::Status handle_libc_opcodes(rpc::Server::Port &port, uint32_t num_lanes);
} // namespace rpc

#endif // LLVM_LIBC_SHARED_RPC_OPCODES_H
22 changes: 22 additions & 0 deletions libc/shared/rpc_server.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Shared RPC server interface -----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SHARED_RPC_SERVER_H
#define LLVM_LIBC_SHARED_RPC_SERVER_H

#include "src/__support/RPC/rpc_server.h"

namespace LIBC_NAMESPACE_DECL {
namespace shared {

using LIBC_NAMESPACE::rpc::handle_libc_opcodes;

} // namespace shared
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SHARED_RPC_SERVER_H
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,45 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is intended to be used externally as part of the `shared/`
// interface. For that purpose, we manually define a few options normally
// handled by the libc build system.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_RPC_RPC_SERVER_H
#define LLVM_LIBC_SRC___SUPPORT_RPC_RPC_SERVER_H

// Workaround for missing __has_builtin in < GCC 10.
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif

// Configs for using the LLVM libc writer interface.
#define LIBC_COPT_USE_C_ASSERT
#define LIBC_COPT_MEMCPY_USE_EMBEDDED_TINY
#define LIBC_COPT_ARRAY_ARG_LIST
#define LIBC_COPT_PRINTF_DISABLE_WRITE_INT
#define LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
#define LIBC_COPT_PRINTF_DISABLE_STRERROR

// The 'long double' type is 8 byte
#define LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64

#include "shared/rpc.h"
#include "shared/rpc_opcodes.h"

#include "src/__support/CPP/type_traits.h"
#include "src/__support/arg_list.h"
#include "src/stdio/printf_core/converter.h"
#include "src/stdio/printf_core/parser.h"
#include "src/stdio/printf_core/writer.h"

#include <stdio.h>
#include <stdlib.h>
#include "hdr/stdio_overlay.h"
#include "hdr/stdlib_overlay.h"

namespace LIBC_NAMESPACE {
namespace LIBC_NAMESPACE_DECL {
namespace internal {

// Minimal replacement for 'std::vector' that works for trivial types.
template <typename T> class TempVector {
Expand All @@ -35,68 +55,66 @@ template <typename T> class TempVector {
size_t capacity;

public:
TempVector() : data(nullptr), current(0), capacity(0) {}
LIBC_INLINE TempVector() : data(nullptr), current(0), capacity(0) {}

~TempVector() { free(data); }
LIBC_INLINE ~TempVector() { free(data); }

void push_back(const T &value) {
LIBC_INLINE void push_back(const T &value) {
if (current == capacity)
grow();
data[current] = T(value);
++current;
}

void push_back(T &&value) {
LIBC_INLINE void push_back(T &&value) {
if (current == capacity)
grow();
data[current] = T(static_cast<T &&>(value));
++current;
}

void pop_back() { --current; }
LIBC_INLINE void pop_back() { --current; }

bool empty() { return current == 0; }
LIBC_INLINE bool empty() { return current == 0; }

size_t size() { return current; }
LIBC_INLINE size_t size() { return current; }

T &operator[](size_t index) { return data[index]; }
LIBC_INLINE T &operator[](size_t index) { return data[index]; }

T &back() { return data[current - 1]; }
LIBC_INLINE T &back() { return data[current - 1]; }

private:
void grow() {
LIBC_INLINE void grow() {
size_t new_capacity = capacity ? capacity * 2 : 1;
void *new_data = realloc(data, new_capacity * sizeof(T));
if (!new_data)
abort();
data = static_cast<T *>(new_data);
capacity = new_capacity;
}
};

struct TempStorage {
char *alloc(size_t size) {
LIBC_INLINE char *alloc(size_t size) {
storage.push_back(reinterpret_cast<char *>(malloc(size)));
return storage.back();
}

~TempStorage() {
LIBC_INLINE ~TempStorage() {
for (size_t i = 0; i < storage.size(); ++i)
free(storage[i]);
}

TempVector<char *> storage;
};

enum Stream {
File = 0,
Stdin = 1,
Stdout = 2,
Stderr = 3,
};

// Get the associated stream out of an encoded number.
LIBC_INLINE ::FILE *to_stream(uintptr_t f) {
LIBC_INLINE static ::FILE *to_stream(uintptr_t f) {
enum Stream {
File = 0,
Stdin = 1,
Stdout = 2,
Stderr = 3,
};

::FILE *stream = reinterpret_cast<FILE *>(f & ~0x3ull);
Stream type = static_cast<Stream>(f & 0x3ull);
if (type == Stdin)
Expand All @@ -109,7 +127,8 @@ LIBC_INLINE ::FILE *to_stream(uintptr_t f) {
}

template <bool packed, uint32_t num_lanes>
static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) {
LIBC_INLINE static void handle_printf(rpc::Server::Port &port,
TempStorage &temp_storage) {
FILE *files[num_lanes] = {nullptr};
// Get the appropriate output stream to use.
if (port.get_opcode() == LIBC_PRINTF_TO_STREAM ||
Expand Down Expand Up @@ -268,7 +287,8 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) {
}
}

results[lane] = fwrite(buffer, 1, writer.get_chars_written(), files[lane]);
results[lane] = static_cast<int>(
fwrite(buffer, 1, writer.get_chars_written(), files[lane]));
if (results[lane] != writer.get_chars_written() || ret == -1)
results[lane] = -1;
}
Expand All @@ -282,7 +302,7 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) {
}

template <uint32_t num_lanes>
rpc::Status handle_port_impl(rpc::Server::Port &port) {
LIBC_INLINE static rpc::Status handle_port_impl(rpc::Server::Port &port) {
TempStorage temp_storage;

switch (port.get_opcode()) {
Expand Down Expand Up @@ -333,8 +353,9 @@ rpc::Status handle_port_impl(rpc::Server::Port &port) {
void *data[num_lanes] = {nullptr};
port.recv([&](rpc::Buffer *buffer, uint32_t id) {
data[id] = temp_storage.alloc(buffer->data[0]);
const char *str = fgets(reinterpret_cast<char *>(data[id]),
buffer->data[0], to_stream(buffer->data[1]));
const char *str = ::fgets(reinterpret_cast<char *>(data[id]),
static_cast<int>(buffer->data[0]),
to_stream(buffer->data[1]));
sizes[id] = !str ? 0 : __builtin_strlen(str) + 1;
});
port.send_n(data, sizes);
Expand All @@ -353,9 +374,9 @@ rpc::Status handle_port_impl(rpc::Server::Port &port) {
break;
}
case LIBC_CLOSE_FILE: {
port.recv_and_send([&](rpc::Buffer *buffer, uint32_t id) {
port.recv_and_send([&](rpc::Buffer *buffer, uint32_t) {
FILE *file = reinterpret_cast<FILE *>(buffer->data[0]);
buffer->data[0] = fclose(file);
buffer->data[0] = ::fclose(file);
});
break;
}
Expand Down Expand Up @@ -498,21 +519,28 @@ rpc::Status handle_port_impl(rpc::Server::Port &port) {
return rpc::RPC_SUCCESS;
}

} // namespace LIBC_NAMESPACE
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL

namespace LIBC_NAMESPACE_DECL {
namespace rpc {
// The implementation of this function currently lives in the utility directory
// at 'utils/gpu/server/rpc_server.cpp'.
rpc::Status handle_libc_opcodes(rpc::Server::Port &port, uint32_t num_lanes) {

// Handles any opcode generated from the 'libc' client code.
LIBC_INLINE ::rpc::Status handle_libc_opcodes(::rpc::Server::Port &port,
uint32_t num_lanes) {
switch (num_lanes) {
case 1:
return LIBC_NAMESPACE::handle_port_impl<1>(port);
return internal::handle_port_impl<1>(port);
case 32:
return LIBC_NAMESPACE::handle_port_impl<32>(port);
return internal::handle_port_impl<32>(port);
case 64:
return LIBC_NAMESPACE::handle_port_impl<64>(port);
return internal::handle_port_impl<64>(port);
default:
return rpc::RPC_ERROR;
return ::rpc::RPC_ERROR;
}
}

} // namespace rpc
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_RPC_RPC_SERVER_H
1 change: 0 additions & 1 deletion libc/utils/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
add_subdirectory(server)
add_subdirectory(loader)
3 changes: 3 additions & 0 deletions libc/utils/gpu/loader/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
add_library(gpu_loader OBJECT Main.cpp)

include(FindLibcCommonUtils)
target_link_libraries(gpu_loader PUBLIC llvm-libc-common-utilities)

target_include_directories(gpu_loader PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${LIBC_SOURCE_DIR}/include
Expand Down
3 changes: 2 additions & 1 deletion libc/utils/gpu/loader/Loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "shared/rpc.h"
#include "shared/rpc_opcodes.h"
#include "shared/rpc_server.h"
Comment on lines 14 to +16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the libc code isn't intended to include from /shared, if you need to use code both inside of libc and externally it should go in /src/__support with /shared only exporting the parts you want to use in other projects.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's going to be really tough making that work but I'll try.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay I think this works, PTAL.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now that you've moved rpc_server.h to __support, this should be fine to change to #include "src/__support/RPC/rpc_server.h". Then in a followup you can do the same thing with the other rpc headers in /shared so that they're all consistent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But we want to use the shared interface since this is exported from libc right

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I.e. we need to use the shared interface since it's the one that defines the LIBC_NAMESpACE and other stuff.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the shared interface is for external callers, but internally libc should use its own interface. Also I forgot about the previous discussion on placement of the other rpc headers in /shared, they're fine there for now but iirc the plan is to move them to /include/rpc eventually. Writing that down so I remember it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But this isn't internal, this will be moved outside of libc soon and the offload/ stuff is definitely outside of libc.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, if this is getting moved outside of libc then this is correct. I was under the impression you weren't planning to move it.


#include <cstddef>
#include <cstdint>
Expand Down Expand Up @@ -181,7 +182,7 @@ inline uint32_t handle_server(rpc::Server &server, uint32_t index,
break;
}
default:
status = handle_libc_opcodes(*port, num_lanes);
status = LIBC_NAMESPACE::shared::handle_libc_opcodes(*port, num_lanes);
break;
}

Expand Down
8 changes: 1 addition & 7 deletions libc/utils/gpu/loader/amdgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,4 @@ set(LLVM_LINK_COMPONENTS
)

add_llvm_executable(amdhsa-loader amdhsa-loader.cpp)

target_link_libraries(amdhsa-loader
PRIVATE
hsa-runtime64::hsa-runtime64
gpu_loader
llvmlibc_rpc_server
)
target_link_libraries(amdhsa-loader PRIVATE hsa-runtime64::hsa-runtime64 gpu_loader)
8 changes: 1 addition & 7 deletions libc/utils/gpu/loader/nvptx/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,4 @@ set(LLVM_LINK_COMPONENTS
)

add_llvm_executable(nvptx-loader nvptx-loader.cpp)

target_link_libraries(nvptx-loader
PRIVATE
gpu_loader
llvmlibc_rpc_server
CUDA::cuda_driver
)
target_link_libraries(nvptx-loader PRIVATE gpu_loader CUDA::cuda_driver)
30 changes: 0 additions & 30 deletions libc/utils/gpu/server/CMakeLists.txt

This file was deleted.

Loading
Loading