Skip to content

Commit a8970f6

Browse files
partway through porting, implicit address space conversion is used all over devicertl
1 parent 72791fe commit a8970f6

File tree

5 files changed

+42
-23
lines changed

5 files changed

+42
-23
lines changed

offload/DeviceRTL/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I")
9898
set(bc_flags -c -flto -std=c++17 -fvisibility=hidden
9999
${clang_opt_flags} -nogpulib -nostdlibinc
100100
-fno-rtti -fno-exceptions -fconvergent-functions
101+
-Wno-atomic-alignment # spir-v complains about 4 byte atomics then Werror :(
101102
-Wno-unknown-cuda-version
102103
-DOMPTARGET_DEVICE_RUNTIME
103104
-I${include_directory}
@@ -257,6 +258,10 @@ endfunction()
257258
add_custom_target(omptarget.devicertl.amdgpu)
258259
compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=none)
259260

261+
add_custom_target(omptarget.devicertl.amdgpu-spirv)
262+
compileDeviceRTLLibrary(amdgpu-spirv spirv64-amd-amdhsa -Xclang -mcode-object-version=none)
263+
264+
260265
add_custom_target(omptarget.devicertl.nvptx)
261266
compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)
262267

offload/DeviceRTL/include/State.h

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,21 @@ struct ThreadStateTy {
102102
ThreadStateTy *PreviousThreadState;
103103

104104
void init() {
105-
ICVState = TeamState.ICVState;
105+
// assignment relies on implicit conversion between address spaces
106+
// ICVState = TeamState.ICVState;
107+
__builtin_memcpy(&ICVState,
108+
&TeamState.ICVState,
109+
sizeof(ICVState));
106110
PreviousThreadState = nullptr;
107111
}
108112

109113
void init(ThreadStateTy *PreviousTS) {
110-
ICVState = PreviousTS ? PreviousTS->ICVState : TeamState.ICVState;
114+
__builtin_memcpy(&ICVState,
115+
PreviousTS ?
116+
(state::ICVStateTy*) & PreviousTS->ICVState
117+
: (state::ICVStateTy*) &TeamState.ICVState,
118+
sizeof(ICVState));
119+
111120
PreviousThreadState = PreviousTS;
112121
}
113122
};
@@ -159,7 +168,7 @@ void resetStateForThread(uint32_t TId);
159168
{ \
160169
if (OMP_LIKELY(ForceTeamState || !config::mayUseThreadStates() || \
161170
!TeamState.HasThreadState)) \
162-
return TeamState.ICVState.Member; \
171+
return (uint32_t &)TeamState.ICVState.Member; \
163172
uint32_t TId = mapping::getThreadIdInBlock(); \
164173
if (OMP_UNLIKELY(!ThreadStates[TId])) { \
165174
ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>( \
@@ -169,7 +178,7 @@ void resetStateForThread(uint32_t TId);
169178
TeamState.HasThreadState = true; \
170179
ThreadStates[TId]->init(); \
171180
} \
172-
return ThreadStates[TId]->ICVState.Member; \
181+
return (uint32_t &)ThreadStates[TId]->ICVState.Member; \
173182
}
174183

175184
// FIXME: https://github.com/llvm/llvm-project/issues/123241.
@@ -178,8 +187,8 @@ void resetStateForThread(uint32_t TId);
178187
auto TId = mapping::getThreadIdInBlock(); \
179188
if (OMP_UNLIKELY(!ForceTeamState && config::mayUseThreadStates() && \
180189
TeamState.HasThreadState && ThreadStates[TId])) \
181-
return ThreadStates[TId]->ICVState.Member; \
182-
return TeamState.ICVState.Member; \
190+
return (uint32_t &)ThreadStates[TId]->ICVState.Member; \
191+
return (uint32_t &)TeamState.ICVState.Member; \
183192
}
184193

185194
[[gnu::always_inline, gnu::flatten]] inline uint32_t &
@@ -210,9 +219,9 @@ lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
210219
lookupImpl(RunSchedChunkVar, ForceTeamState);
211220
lookupForModify32Impl(RunSchedChunkVar, Ident, ForceTeamState);
212221
case state::VK_ParallelTeamSize:
213-
return TeamState.ParallelTeamSize;
222+
return (uint32_t &)TeamState.ParallelTeamSize;
214223
case state::VK_HasThreadState:
215-
return TeamState.HasThreadState;
224+
return (uint32_t &)TeamState.HasThreadState;
216225
default:
217226
break;
218227
}
@@ -223,7 +232,7 @@ lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
223232
lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) {
224233
switch (Kind) {
225234
case state::VK_ParallelRegionFn:
226-
return TeamState.ParallelRegionFnVar;
235+
return (void *&)TeamState.ParallelRegionFnVar;
227236
default:
228237
break;
229238
}

offload/DeviceRTL/src/State.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ void SharedMemorySmartStackTy::pop(void *Ptr, uint64_t Bytes) {
160160

161161
} // namespace
162162

163-
void *memory::getDynamicBuffer() { return DynamicSharedBuffer; }
163+
void *memory::getDynamicBuffer() { return (void *)DynamicSharedBuffer; }
164164

165165
void *memory::allocShared(uint64_t Bytes, const char *Reason) {
166166
return SharedMemorySmartStack.push(Bytes);
@@ -219,6 +219,7 @@ bool state::TeamStateTy::operator==(const TeamStateTy &Other) const {
219219

220220
void state::TeamStateTy::assertEqual(TeamStateTy &Other) const {
221221
ICVState.assertEqual(Other.ICVState);
222+
// Other.ICVState.assertEqual(ICVState);
222223
ASSERT(ParallelTeamSize == Other.ParallelTeamSize, nullptr);
223224
ASSERT(HasThreadState == Other.HasThreadState, nullptr);
224225
}
@@ -273,7 +274,7 @@ void state::enterDataEnvironment(IdentTy *Ident) {
273274
unsigned TId = mapping::getThreadIdInBlock();
274275
ThreadStateTy *NewThreadState = static_cast<ThreadStateTy *>(
275276
memory::allocGlobal(sizeof(ThreadStateTy), "ThreadStates alloc"));
276-
uintptr_t *ThreadStatesBitsPtr = reinterpret_cast<uintptr_t *>(&ThreadStates);
277+
uintptr_t *ThreadStatesBitsPtr = (uintptr_t *)(&ThreadStates);
277278
if (!atomic::load(ThreadStatesBitsPtr, atomic::seq_cst)) {
278279
uint32_t Bytes =
279280
sizeof(ThreadStates[0]) * mapping::getNumberOfThreadsInBlock();
@@ -313,18 +314,22 @@ void state::resetStateForThread(uint32_t TId) {
313314
}
314315

315316
void state::runAndCheckState(void(Func(void))) {
316-
TeamStateTy OldTeamState = TeamState;
317-
OldTeamState.assertEqual(TeamState);
317+
// TeamStateTy OldTeamState = TeamState;
318+
TeamStateTy OldTeamState;
319+
__builtin_memcpy(&OldTeamState,
320+
&TeamState,
321+
sizeof(TeamStateTy));
322+
OldTeamState.assertEqual((TeamStateTy&)TeamState);
318323

319324
Func();
320325

321-
OldTeamState.assertEqual(TeamState);
326+
OldTeamState.assertEqual((TeamStateTy&)TeamState);
322327
}
323328

324329
void state::assumeInitialState(bool IsSPMD) {
325330
TeamStateTy InitialTeamState;
326331
InitialTeamState.init(IsSPMD);
327-
InitialTeamState.assertEqual(TeamState);
332+
InitialTeamState.assertEqual((TeamStateTy&)TeamState);
328333
ASSERT(mapping::isSPMDMode() == IsSPMD, nullptr);
329334
}
330335

@@ -461,7 +466,7 @@ constexpr uint64_t NUM_SHARED_VARIABLES_IN_SHARED_MEM = 64;
461466

462467
void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t nArgs) {
463468
if (nArgs <= NUM_SHARED_VARIABLES_IN_SHARED_MEM) {
464-
SharedMemVariableSharingSpacePtr = &SharedMemVariableSharingSpace[0];
469+
SharedMemVariableSharingSpacePtr = (void**)&SharedMemVariableSharingSpace[0];
465470
} else {
466471
SharedMemVariableSharingSpacePtr = (void **)memory::allocGlobal(
467472
nArgs * sizeof(void *), "new extended args");
@@ -472,7 +477,7 @@ void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t nArgs) {
472477
}
473478

474479
void __kmpc_end_sharing_variables() {
475-
if (SharedMemVariableSharingSpacePtr != &SharedMemVariableSharingSpace[0])
480+
if ((void*)SharedMemVariableSharingSpacePtr != (void*)&SharedMemVariableSharingSpace[0])
476481
memory::freeGlobal(SharedMemVariableSharingSpacePtr, "new extended args");
477482
}
478483

offload/cmake/caches/Offload.cmake

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
set(LLVM_ENABLE_PROJECTS "clang;clang-tools-extra;lld" CACHE STRING "")
2-
set(LLVM_ENABLE_RUNTIMES "compiler-rt;libunwind;libcxx;libcxxabi;openmp;offload" CACHE STRING "")
2+
set(LLVM_ENABLE_RUNTIMES "compiler-rt;openmp;offload" CACHE STRING "")
33
set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
44

55
set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "")
6-
set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
7-
set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
8-
set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;libcxx;libcxxabi" CACHE STRING "")
9-
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;libcxx;libcxxabi" CACHE STRING "")
6+
# set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
7+
# set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
8+
set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc" CACHE STRING "")
9+
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc" CACHE STRING "")

offload/test/offloading/bug64959.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,6 @@ int main(void) {
5555
return 1;
5656
}
5757
// CHECK: Success
58-
printf("Success\n");
58+
printf("Suc disabled cess\n");
5959
return 0;
6060
}

0 commit comments

Comments
 (0)