Skip to content

Commit 4b10495

Browse files
committed
flang-new: fix coarse grain fail
1 parent cca5f2a commit 4b10495

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,6 +1051,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
10511051
return OMPX_MinThreadsForLowTripCount;
10521052
}
10531053

1054+
virtual uint32_t getUseCoarseGrain() {
1055+
return OMPX_EnableCoarseAllocs;
1056+
}
10541057
/// Whether or not to reuse blocks for high trip count loops.
10551058
/// @see OMPX_ReuseBlocksForHighTripCount
10561059
bool getReuseBlocksForHighTripCount() {
@@ -1209,6 +1212,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
12091212
BoolEnvar OMPX_ReuseBlocksForHighTripCount =
12101213
BoolEnvar("LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT", true);
12111214

1215+
/// Envar to enable coasre alocs.
1216+
BoolEnvar OMPX_EnableCoarseAllocs;
1217+
12121218
protected:
12131219
/// Environment variables defined by the LLVM OpenMP implementation
12141220
/// regarding the initial number of streams and events.

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,7 @@ GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
927927
OMPX_InitialNumEvents("LIBOMPTARGET_NUM_INITIAL_EVENTS", 1),
928928
OMPX_NumMultiDevices("LIBOMPTARGET_NUM_MULTI_DEVICES", 0),
929929
OMPX_EnableRuntimeAutotuning("OMPX_ENABLE_RUNTIME_AUTOTUNING", false),
930+
OMPX_EnableCoarseAllocs("OMPX_ENABLE_COARSE_ALLOCS", false),
930931
DeviceId(DeviceId), GridValues(OMPGridValues),
931932
PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock(),
932933
PinnedAllocs(*this), RPCServer(nullptr) {
@@ -2208,7 +2209,7 @@ void *GenericPluginTy::data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr,
22082209

22092210
// Method has no effect when the CUDA Plugin is used.
22102211
// This method can only be called if HostPtr is not null.
2211-
if (HostPtr && Kind == TARGET_ALLOC_SHARED)
2212+
if (HostPtr && Kind == TARGET_ALLOC_SHARED && getDevice(DeviceId).getUseCoarseGrain() )
22122213
set_coarse_grain_mem_region(DeviceId, HostPtr, Size);
22132214

22142215
return *AllocOrErr;

0 commit comments

Comments
 (0)