Skip to content

Commit fa3c742

Browse files
committed
[OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause
1 parent f66e5fa commit fa3c742

File tree

23 files changed

+418
-65
lines changed

23 files changed

+418
-65
lines changed

offload/DeviceRTL/include/DeviceTypes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,4 +163,8 @@ typedef enum omp_allocator_handle_t {
163163

164164
///}
165165

166+
enum omp_access_t {
167+
omp_access_cgroup = 0,
168+
};
169+
166170
#endif

offload/DeviceRTL/include/Interface.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ struct KernelEnvironmentTy;
222222
int8_t __kmpc_is_spmd_exec_mode();
223223

224224
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
225-
KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
225+
KernelLaunchEnvironmentTy *KernelLaunchEnvironment);
226226

227227
void __kmpc_target_deinit();
228228

offload/DeviceRTL/include/State.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ extern Local<ThreadStateTy **> ThreadStates;
116116

117117
/// Initialize the state machinery. Must be called by all threads.
118118
void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
119-
KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
119+
KernelLaunchEnvironmentTy *KernelLaunchEnvironment);
120120

121121
/// Return the kernel and kernel launch environment associated with the current
122122
/// kernel. The former is static and contains compile time information that

offload/DeviceRTL/src/Kernel.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ enum OMPTgtExecModeFlags : unsigned char {
3434
};
3535

3636
static void
37-
inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
38-
KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
37+
initializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
38+
KernelLaunchEnvironmentTy *KernelLaunchEnvironment) {
3939
// Order is important here.
4040
synchronize::init(IsSPMD);
4141
mapping::init(IsSPMD);
@@ -80,17 +80,17 @@ extern "C" {
8080
/// \param Ident Source location identification, can be NULL.
8181
///
8282
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
83-
KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
83+
KernelLaunchEnvironmentTy *KernelLaunchEnvironment) {
8484
ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
8585
bool IsSPMD = Configuration.ExecMode & OMP_TGT_EXEC_MODE_SPMD;
8686
bool UseGenericStateMachine = Configuration.UseGenericStateMachine;
8787
if (IsSPMD) {
88-
inititializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
89-
KernelLaunchEnvironment);
88+
initializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
89+
KernelLaunchEnvironment);
9090
synchronize::threadsAligned(atomic::relaxed);
9191
} else {
92-
inititializeRuntime(/*IsSPMD=*/false, KernelEnvironment,
93-
KernelLaunchEnvironment);
92+
initializeRuntime(/*IsSPMD=*/false, KernelEnvironment,
93+
KernelLaunchEnvironment);
9494
// No need to wait since only the main threads will execute user
9595
// code and workers will run into a barrier right away.
9696
}

offload/DeviceRTL/src/State.cpp

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,34 @@ void SharedMemorySmartStackTy::pop(void *Ptr, uint64_t Bytes) {
158158
memory::freeGlobal(Ptr, "Slow path shared memory deallocation");
159159
}
160160

161+
struct DynCGroupMemTy {
162+
void init(KernelLaunchEnvironmentTy *KLE, void *NativeDynCGroup) {
163+
Size = 0;
164+
Ptr = nullptr;
165+
IsFallback = false;
166+
if (KLE) {
167+
Size = KLE->DynCGroupMemSize;
168+
if (void *Fallback = KLE->DynCGroupMemFallback) {
169+
Ptr = static_cast<char *>(Fallback) + Size * omp_get_team_num();
170+
IsFallback = true;
171+
} else {
172+
Ptr = static_cast<char *>(NativeDynCGroup);
173+
}
174+
}
175+
}
176+
177+
char *getPtr(size_t Offset) const { return Ptr + Offset; }
178+
bool isFallback() const { return IsFallback; }
179+
size_t getSize() const { return Size; }
180+
181+
private:
182+
char *Ptr;
183+
size_t Size;
184+
bool IsFallback;
185+
};
186+
187+
[[clang::loader_uninitialized]] static Local<DynCGroupMemTy> DynCGroupMem;
188+
161189
} // namespace
162190

163191
void *memory::getDynamicBuffer() { return DynamicSharedBuffer; }
@@ -246,13 +274,18 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,
246274
} // namespace
247275

248276
void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
249-
KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
277+
KernelLaunchEnvironmentTy *KLE) {
250278
SharedMemorySmartStack.init(IsSPMD);
279+
280+
if (KLE == reinterpret_cast<KernelLaunchEnvironmentTy *>(~0))
281+
KLE = nullptr;
282+
251283
if (mapping::isInitialThreadInLevel0(IsSPMD)) {
284+
DynCGroupMem.init(KLE, DynamicSharedBuffer);
252285
TeamState.init(IsSPMD);
253286
ThreadStates = nullptr;
254287
KernelEnvironmentPtr = &KernelEnvironment;
255-
KernelLaunchEnvironmentPtr = &KernelLaunchEnvironment;
288+
KernelLaunchEnvironmentPtr = KLE;
256289
}
257290
}
258291

@@ -430,6 +463,17 @@ int omp_get_team_num() { return mapping::getBlockIdInKernel(); }
430463
int omp_get_initial_device(void) { return -1; }
431464

432465
int omp_is_initial_device(void) { return 0; }
466+
467+
void *omp_get_dyn_groupprivate_ptr(size_t Offset, int *IsFallback,
468+
omp_access_t) {
469+
if (IsFallback != NULL)
470+
*IsFallback = DynCGroupMem.isFallback();
471+
return DynCGroupMem.getPtr(Offset);
472+
}
473+
474+
size_t omp_get_dyn_groupprivate_size(omp_access_t) {
475+
return DynCGroupMem.getSize();
476+
}
433477
}
434478

435479
extern "C" {

offload/include/Shared/APITypes.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,10 @@ struct KernelArgsTy {
9797
struct {
9898
uint64_t NoWait : 1; // Was this kernel spawned with a `nowait` clause.
9999
uint64_t IsCUDA : 1; // Was this kernel spawned via CUDA.
100-
uint64_t Unused : 62;
101-
} Flags = {0, 0, 0};
100+
uint64_t AllowDynCGroupMemFallback : 1; // Allow fallback for dynamic cgroup
101+
// mem fallback.
102+
uint64_t Unused : 61;
103+
} Flags = {0, 0, 0, 0};
102104
// The number of teams (for x,y,z dimension).
103105
uint32_t NumTeams[3] = {0, 0, 0};
104106
// The number of threads (for x,y,z dimension).

offload/include/Shared/Environment.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,11 @@ struct KernelEnvironmentTy {
9393
};
9494

9595
struct KernelLaunchEnvironmentTy {
96+
void *ReductionBuffer = nullptr;
97+
void *DynCGroupMemFallback = nullptr;
9698
uint32_t ReductionCnt = 0;
9799
uint32_t ReductionIterCnt = 0;
98-
void *ReductionBuffer = nullptr;
100+
uint32_t DynCGroupMemSize = 0;
99101
};
100102

101103
#endif // OMPTARGET_SHARED_ENVIRONMENT_H

offload/include/device.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@ struct DeviceTy {
158158
/// Indicate that there are pending images for this device or not.
159159
void setHasPendingImages(bool V) { HasPendingImages = V; }
160160

161+
/// Get the maximum shared memory per team for any kernel.
162+
uint64_t getMaxSharedTeamMemory();
163+
161164
private:
162165
/// Deinitialize the device (and plugin).
163166
void deinit();

offload/include/omptarget.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ enum TargetAllocTy : int32_t {
107107

108108
inline KernelArgsTy CTorDTorKernelArgs = {1, 0, nullptr, nullptr,
109109
nullptr, nullptr, nullptr, nullptr,
110-
0, {0,0,0}, {1, 0, 0}, {1, 0, 0}, 0};
110+
0, {0,0,0,0}, {1, 0, 0}, {1, 0, 0}, 0};
111111

112112
struct DeviceTy;
113113

@@ -273,10 +273,15 @@ struct __tgt_target_non_contig {
273273
extern "C" {
274274
#endif
275275

276+
typedef enum {
277+
omp_access_cgroup = 0,
278+
} omp_access_t;
279+
276280
void ompx_dump_mapping_tables(void);
277281
int omp_get_num_devices(void);
278282
int omp_get_device_num(void);
279283
int omp_get_initial_device(void);
284+
size_t omp_get_groupprivate_limit(int device_num, omp_access_t access_group = omp_access_cgroup);
280285
void *omp_target_alloc(size_t Size, int DeviceNum);
281286
void omp_target_free(void *DevicePtr, int DeviceNum);
282287
int omp_target_is_present(const void *Ptr, int DeviceNum);

offload/libomptarget/OpenMP/API.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,20 @@ EXTERN int omp_get_initial_device(void) {
9898
return HostDevice;
9999
}
100100

101+
EXTERN size_t omp_get_groupprivate_limit(int DeviceNum,
102+
omp_access_t AccessGroup) {
103+
TIMESCOPE();
104+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
105+
if (DeviceNum == omp_get_initial_device())
106+
return 0;
107+
108+
auto DeviceOrErr = PM->getDevice(DeviceNum);
109+
if (!DeviceOrErr)
110+
FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
111+
112+
return DeviceOrErr->getMaxSharedTeamMemory();
113+
}
114+
101115
EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) {
102116
TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DeviceNum) +
103117
";size=" + std::to_string(Size));

0 commit comments

Comments
 (0)