diff --git a/offload/include/OffloadPolicy.h b/offload/include/OffloadPolicy.h index 800fefb224326..d794376f2b59e 100644 --- a/offload/include/OffloadPolicy.h +++ b/offload/include/OffloadPolicy.h @@ -37,12 +37,12 @@ class OffloadPolicy { return; default: if (PM.getNumDevices()) { - DP("Default TARGET OFFLOAD policy is now mandatory " - "(devices were found)\n"); + DPIF(RTL, "Default TARGET OFFLOAD policy is now mandatory " + "(devices were found)\n"); Kind = MANDATORY; } else { - DP("Default TARGET OFFLOAD policy is now disabled " - "(no devices were found)\n"); + DPIF(RTL, "Default TARGET OFFLOAD policy is now disabled " + "(no devices were found)\n"); Kind = DISABLED; } return; diff --git a/offload/include/OpenMP/OMPT/Connector.h b/offload/include/OpenMP/OMPT/Connector.h index c7b37740d5642..d37ea07e62166 100644 --- a/offload/include/OpenMP/OMPT/Connector.h +++ b/offload/include/OpenMP/OMPT/Connector.h @@ -76,7 +76,7 @@ class OmptLibraryConnectorTy { std::string LibName = LibIdent; LibName += ".so"; - DP("OMPT: Trying to load library %s\n", LibName.c_str()); + DPIF(TOOL, "OMPT: Trying to load library %s\n", LibName.c_str()); auto DynLibHandle = std::make_unique( llvm::sys::DynamicLibrary::getPermanentLibrary(LibName.c_str(), &ErrMsg)); @@ -85,12 +85,12 @@ class OmptLibraryConnectorTy { LibConnHandle = nullptr; } else { auto LibConnRtn = "ompt_" + LibIdent + "_connect"; - DP("OMPT: Trying to get address of connection routine %s\n", - LibConnRtn.c_str()); + DPIF(TOOL, "OMPT: Trying to get address of connection routine %s\n", + LibConnRtn.c_str()); LibConnHandle = reinterpret_cast( DynLibHandle->getAddressOfSymbol(LibConnRtn.c_str())); } - DP("OMPT: Library connection handle = %p\n", LibConnHandle); + DPIF(TOOL, "OMPT: Library connection handle = %p\n", LibConnHandle); IsInitialized = true; } diff --git a/offload/include/Shared/Debug.h b/offload/include/Shared/Debug.h index 7c3db8dbf119f..b16bd5ea0f4a2 100644 --- a/offload/include/Shared/Debug.h +++ b/offload/include/Shared/Debug.h @@ -40,6 +40,7 @@ #include #include +#include #include /// 32-Bit field data attributes controlling information presented to the user. @@ -62,6 +63,38 @@ enum OpenMPInfoType : uint32_t { OMP_INFOTYPE_ALL = 0xffffffff, }; +/// 32-bit field attributes controlling debug trace/dump +enum DebugInfoType : uint32_t { + /// Generic plugin/runtime interface/management + DEBUG_INFOTYPE_RTL = 0x0001, + /// Generic device activity + DEBUG_INFOTYPE_DEVICE = 0x0002, + /// Module preparation + DEBUG_INFOTYPE_MODULE = 0x0004, + /// Kernel preparation and invocation + DEBUG_INFOTYPE_KERNEL = 0x0008, + /// Memory allocation/deallocation or related activities + DEBUG_INFOTYPE_MEMORY = 0x0010, + /// Data-mapping activities + DEBUG_INFOTYPE_MAP = 0x0020, + /// Data-copying or similar activities + DEBUG_INFOTYPE_COPY = 0x0040, + /// OpenMP interop + DEBUG_INFOTYPE_INTEROP = 0x0080, + /// Tool interface + DEBUG_INFOTYPE_TOOL = 0x0100, + /// Backend API tracing + DEBUG_INFOTYPE_API = 0x0200, + /// All + DEBUG_INFOTYPE_ALL = 0xffffffff, +}; + +/// Debug option struct to support both numeric and string value +struct DebugOptionTy { + uint32_t Level; + uint32_t Type; +}; + inline std::atomic &getInfoLevelInternal() { static std::atomic InfoLevel; static std::once_flag Flag{}; @@ -75,17 +108,45 @@ inline std::atomic &getInfoLevelInternal() { inline uint32_t getInfoLevel() { return getInfoLevelInternal().load(); } -inline uint32_t getDebugLevel() { - static uint32_t DebugLevel = 0; - static std::once_flag Flag{}; - std::call_once(Flag, []() { - if (char *EnvStr = getenv("LIBOMPTARGET_DEBUG")) - DebugLevel = std::stoi(EnvStr); - }); - - return DebugLevel; +inline DebugOptionTy &getDebugOption() { + static DebugOptionTy DebugOption = []() { + DebugOptionTy OptVal{0, 0}; + char *EnvStr = getenv("LIBOMPTARGET_DEBUG"); + if (!EnvStr || *EnvStr == '0') + return OptVal; // undefined or explicitly defined as zero + OptVal.Level = std::atoi(EnvStr); + if (OptVal.Level) + return OptVal; // defined as numeric value + struct DebugStrToBitTy { + const char *Str; + uint32_t Bit; + } DebugStrToBit[] = { + {"rtl", DEBUG_INFOTYPE_RTL}, {"device", DEBUG_INFOTYPE_DEVICE}, + {"module", DEBUG_INFOTYPE_MODULE}, {"kernel", DEBUG_INFOTYPE_KERNEL}, + {"memory", DEBUG_INFOTYPE_MEMORY}, {"map", DEBUG_INFOTYPE_MAP}, + {"copy", DEBUG_INFOTYPE_COPY}, {"interop", DEBUG_INFOTYPE_INTEROP}, + {"tool", DEBUG_INFOTYPE_TOOL}, {"api", DEBUG_INFOTYPE_API}, + {"all", DEBUG_INFOTYPE_ALL}, {nullptr, 0}, + }; + // Check string value of the option. Comma-separated list of the known + // keywords are accepted. + std::istringstream Tokens(EnvStr); + for (std::string Token; std::getline(Tokens, Token, ',');) { + for (int I = 0; DebugStrToBit[I].Str; I++) { + if (Token == DebugStrToBit[I].Str) { + OptVal.Type |= DebugStrToBit[I].Bit; + break; + } + } + } + return OptVal; + }(); + return DebugOption; } +inline uint32_t getDebugLevel() { return getDebugOption().Level; } +inline uint32_t getDebugType() { return getDebugOption().Type; } + #undef USED #undef GCC_VERSION @@ -154,18 +215,25 @@ inline uint32_t getDebugLevel() { fprintf(stderr, __VA_ARGS__); \ } -/// Emit a message for debugging -#define DP(...) \ +/// Check if debug option is turned on for `Type` +#define DPSET(Type) \ + ((getDebugType() & DEBUG_INFOTYPE_##Type) || getDebugLevel() > 0) + +/// Emit a message for debugging if related to `Type` +#define DPIF(Type, ...) \ do { \ - if (getDebugLevel() > 0) { \ + if (DPSET(Type)) { \ DEBUGP(DEBUG_PREFIX, __VA_ARGS__); \ } \ } while (false) +/// Emit a message for debugging +#define DP(...) DPIF(ALL, __VA_ARGS__); + /// Emit a message for debugging or failure if debugging is disabled #define REPORT(...) \ do { \ - if (getDebugLevel() > 0) { \ + if (DPSET(ALL)) { \ DP(__VA_ARGS__); \ } else { \ FAILURE_MESSAGE(__VA_ARGS__); \ @@ -174,15 +242,45 @@ inline uint32_t getDebugLevel() { #else #define DEBUGP(prefix, ...) \ {} +#define DPSET(Type) false +#define DPIF(Type, ...) \ + { \ + } #define DP(...) \ {} #define REPORT(...) FAILURE_MESSAGE(__VA_ARGS__); #endif // OMPTARGET_DEBUG +#ifdef OMPTARGET_DEBUG +// Convert `OpenMPInfoType` to corresponding `DebugInfoType` +inline bool debugInfoEnabled(OpenMPInfoType InfoType) { + switch (InfoType) { + case OMP_INFOTYPE_KERNEL_ARGS: + [[fallthrough]]; + case OMP_INFOTYPE_PLUGIN_KERNEL: + return DPSET(KERNEL); + case OMP_INFOTYPE_MAPPING_EXISTS: + [[fallthrough]]; + case OMP_INFOTYPE_DUMP_TABLE: + [[fallthrough]]; + case OMP_INFOTYPE_MAPPING_CHANGED: + [[fallthrough]]; + case OMP_INFOTYPE_EMPTY_MAPPING: + return DPSET(MAP); + case OMP_INFOTYPE_DATA_TRANSFER: + return DPSET(COPY); + case OMP_INFOTYPE_ALL: + return DPSET(ALL); + } +} +#else +#define debugInfoEnabled(InfoType) false +#endif // OMPTARGET_DEBUG + /// Emit a message giving the user extra information about the runtime if #define INFO(_flags, _id, ...) \ do { \ - if (getDebugLevel() > 0) { \ + if (debugInfoEnabled(_flags)) { \ DEBUGP(DEBUG_PREFIX, __VA_ARGS__); \ } else if (getInfoLevel() & _flags) { \ INFO_MESSAGE(_id, __VA_ARGS__); \ diff --git a/offload/include/Shared/EnvironmentVar.h b/offload/include/Shared/EnvironmentVar.h index 82f434e91a85b..94974615a05d4 100644 --- a/offload/include/Shared/EnvironmentVar.h +++ b/offload/include/Shared/EnvironmentVar.h @@ -61,7 +61,8 @@ template class Envar { IsPresent = StringParser::parse(EnvStr, Data); if (!IsPresent) { - DP("Ignoring invalid value %s for envar %s\n", EnvStr, Name.data()); + DPIF(RTL, "Ignoring invalid value %s for envar %s\n", EnvStr, + Name.data()); Data = Default; } } @@ -180,12 +181,13 @@ inline llvm::Error Envar::init(llvm::StringRef Name, GetterFunctor Getter, // not present and reset to the getter value (default). IsPresent = false; Data = Default; - DP("Setter of envar %s failed, resetting to %s\n", Name.data(), - std::to_string(Data).data()); + DPIF(RTL, "Setter of envar %s failed, resetting to %s\n", Name.data(), + std::to_string(Data).data()); consumeError(std::move(Err)); } } else { - DP("Ignoring invalid value %s for envar %s\n", EnvStr, Name.data()); + DPIF(RTL, "Ignoring invalid value %s for envar %s\n", EnvStr, + Name.data()); Data = Default; } } else { diff --git a/offload/libomptarget/LegacyAPI.cpp b/offload/libomptarget/LegacyAPI.cpp index 033d7a3ef712a..64297d92e879a 100644 --- a/offload/libomptarget/LegacyAPI.cpp +++ b/offload/libomptarget/LegacyAPI.cpp @@ -180,7 +180,8 @@ EXTERN int __tgt_target_teams_nowait_mapper( EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *Loc, int64_t DeviceId, uint64_t LoopTripcount) { TIMESCOPE_WITH_IDENT(Loc); - DP("WARNING: __kmpc_push_target_tripcount has been deprecated and is a noop"); + DPIF(RTL, "WARNING: __kmpc_push_target_tripcount has been deprecated and is " + "a noop"); } EXTERN void __kmpc_push_target_tripcount(int64_t DeviceId, diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 04bd21ec91a49..48e0347e8af00 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -35,7 +35,7 @@ void initRuntime() { RefCount++; if (RefCount == 1) { - DP("Init offload library!\n"); + DPIF(RTL, "Init offload library!\n"); #ifdef OMPT_SUPPORT // Initialize OMPT first llvm::omp::target::ompt::connectLibrary(); @@ -54,12 +54,12 @@ void deinitRuntime() { assert(PM && "Runtime not initialized"); if (RefCount == 1) { - DP("Deinit offload library!\n"); + DPIF(RTL, "Deinit offload library!\n"); // RTL deinitialization has started RTLAlive = false; while (RTLOngoingSyncs > 0) { - DP("Waiting for ongoing syncs to finish, count: %d\n", - RTLOngoingSyncs.load()); + DPIF(RTL, "Waiting for ongoing syncs to finish, count: %d\n", + RTLOngoingSyncs.load()); std::this_thread::sleep_for(std::chrono::milliseconds(100)); } PM->deinit(); diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index b0f0573833713..14f20f044be89 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -76,7 +76,7 @@ EXTERN int omp_get_num_devices(void) { OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); size_t NumDevices = PM->getNumDevices(); - DP("Call to omp_get_num_devices returning %zd\n", NumDevices); + DPIF(DEVICE, "Call to omp_get_num_devices returning %zd\n", NumDevices); return NumDevices; } @@ -86,7 +86,7 @@ EXTERN int omp_get_device_num(void) { OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); int HostDevice = omp_get_initial_device(); - DP("Call to omp_get_device_num returning %d\n", HostDevice); + DPIF(DEVICE, "Call to omp_get_device_num returning %d\n", HostDevice); return HostDevice; } @@ -95,7 +95,7 @@ EXTERN int omp_get_initial_device(void) { TIMESCOPE(); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); int HostDevice = omp_get_num_devices(); - DP("Call to omp_get_initial_device returning %d\n", HostDevice); + DPIF(DEVICE, "Call to omp_get_initial_device returning %d\n", HostDevice); return HostDevice; } @@ -166,16 +166,17 @@ EXTERN void llvm_omp_target_unlock_mem(void *Ptr, int DeviceNum) { EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { TIMESCOPE(); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n", - DeviceNum, DPxPTR(Ptr)); + DPIF(MAP, + "Call to omp_target_is_present for device %d and address " DPxMOD "\n", + DeviceNum, DPxPTR(Ptr)); if (!Ptr) { - DP("Call to omp_target_is_present with NULL ptr, returning false\n"); + DPIF(MAP, "Call to omp_target_is_present with NULL ptr, returning false\n"); return false; } if (DeviceNum == omp_get_initial_device()) { - DP("Call to omp_target_is_present on host, returning true\n"); + DPIF(MAP, "Call to omp_target_is_present on host, returning true\n"); return true; } @@ -192,7 +193,7 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false); int Rc = TPR.isPresent(); - DP("Call to omp_target_is_present returns %d\n", Rc); + DPIF(MAP, "Call to omp_target_is_present returns %d\n", Rc); return Rc; } @@ -203,15 +204,16 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, ";src_dev=" + std::to_string(SrcDevice) + ";size=" + std::to_string(Length)); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - DP("Call to omp_target_memcpy, dst device %d, src device %d, " - "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " - "src offset %zu, length %zu\n", - DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, - Length); + DPIF(COPY, + "Call to omp_target_memcpy, dst device %d, src device %d, " + "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " + "src offset %zu, length %zu\n", + DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, + Length); if (!Dst || !Src || Length <= 0) { if (Length == 0) { - DP("Call to omp_target_memcpy with zero length, nothing to do\n"); + DPIF(COPY, "Call to omp_target_memcpy with zero length, nothing to do\n"); return OFFLOAD_SUCCESS; } @@ -225,12 +227,12 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, if (SrcDevice == omp_get_initial_device() && DstDevice == omp_get_initial_device()) { - DP("copy from host to host\n"); + DPIF(COPY, "copy from host to host\n"); const void *P = memcpy(DstAddr, SrcAddr, Length); if (P == NULL) Rc = OFFLOAD_FAIL; } else if (SrcDevice == omp_get_initial_device()) { - DP("copy from host to device\n"); + DPIF(COPY, "copy from host to device\n"); auto DstDeviceOrErr = PM->getDevice(DstDevice); if (!DstDeviceOrErr) FATAL_MESSAGE(DstDevice, "%s", @@ -238,7 +240,7 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, AsyncInfoTy AsyncInfo(*DstDeviceOrErr); Rc = DstDeviceOrErr->submitData(DstAddr, SrcAddr, Length, AsyncInfo); } else if (DstDevice == omp_get_initial_device()) { - DP("copy from device to host\n"); + DPIF(COPY, "copy from device to host\n"); auto SrcDeviceOrErr = PM->getDevice(SrcDevice); if (!SrcDeviceOrErr) FATAL_MESSAGE(SrcDevice, "%s", @@ -246,7 +248,7 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); Rc = SrcDeviceOrErr->retrieveData(DstAddr, SrcAddr, Length, AsyncInfo); } else { - DP("copy from device to device\n"); + DPIF(COPY, "copy from device to device\n"); auto SrcDeviceOrErr = PM->getDevice(SrcDevice); if (!SrcDeviceOrErr) FATAL_MESSAGE(SrcDevice, "%s", @@ -278,7 +280,7 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, free(Buffer); } - DP("omp_target_memcpy returns %d\n", Rc); + DPIF(COPY, "omp_target_memcpy returns %d\n", Rc); return Rc; } @@ -301,12 +303,12 @@ static int libomp_target_memcpy_async_task(int32_t Gtid, kmp_task_t *Task) { Args->DstOffsets, Args->SrcOffsets, Args->DstDimensions, Args->SrcDimensions, Args->DstDevice, Args->SrcDevice); - DP("omp_target_memcpy_rect returns %d\n", Rc); + DPIF(COPY, "omp_target_memcpy_rect returns %d\n", Rc); } else { Rc = omp_target_memcpy(Args->Dst, Args->Src, Args->Length, Args->DstOffset, Args->SrcOffset, Args->DstDevice, Args->SrcDevice); - DP("omp_target_memcpy returns %d\n", Rc); + DPIF(COPY, "omp_target_memcpy returns %d\n", Rc); } // Release the arguments object @@ -380,8 +382,9 @@ EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes, int DeviceNum) { TIMESCOPE(); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - DP("Call to omp_target_memset, device %d, device pointer %p, size %zu\n", - DeviceNum, Ptr, NumBytes); + DPIF(COPY, + "Call to omp_target_memset, device %d, device pointer %p, size %zu\n", + DeviceNum, Ptr, NumBytes); // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation // of unspecified behavior, see OpenMP spec). @@ -390,7 +393,7 @@ EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes, } if (DeviceNum == omp_get_initial_device()) { - DP("filling memory on host via memset"); + DPIF(COPY, "filling memory on host via memset"); memset(Ptr, ByteVal, NumBytes); // ignore return value, memset() cannot fail } else { // TODO: replace the omp_target_memset() slow path with the fast path. @@ -410,12 +413,12 @@ EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes, // If the omp_target_alloc has failed, let's just not do anything. // omp_target_memset does not have any good way to fail, so we // simply avoid a catastrophic failure of the process for now. - DP("omp_target_memset failed to fill memory due to error with " - "omp_target_alloc"); + DPIF(COPY, "omp_target_memset failed to fill memory due to error with " + "omp_target_alloc"); } } - DP("omp_target_memset returns %p\n", Ptr); + DPIF(COPY, "omp_target_memset returns %p\n", Ptr); return Ptr; } @@ -423,8 +426,10 @@ EXTERN void *omp_target_memset_async(void *Ptr, int ByteVal, size_t NumBytes, int DeviceNum, int DepObjCount, omp_depend_t *DepObjList) { OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - DP("Call to omp_target_memset_async, device %d, device pointer %p, size %zu", - DeviceNum, Ptr, NumBytes); + DPIF( + COPY, + "Call to omp_target_memset_async, device %d, device pointer %p, size %zu", + DeviceNum, Ptr, NumBytes); // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation // of unspecified behavior, see OpenMP spec). @@ -450,11 +455,12 @@ EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, ";src_dev=" + std::to_string(SrcDevice) + ";size=" + std::to_string(Length)); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - DP("Call to omp_target_memcpy_async, dst device %d, src device %d, " - "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " - "src offset %zu, length %zu\n", - DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, - Length); + DPIF(COPY, + "Call to omp_target_memcpy_async, dst device %d, src device %d, " + "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " + "src offset %zu, length %zu\n", + DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, + Length); // Check the source and dest address if (Dst == nullptr || Src == nullptr) @@ -468,7 +474,7 @@ EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, int Rc = libomp_helper_task_creation(Args, &libomp_target_memcpy_async_task, DepObjCount, DepObjList); - DP("omp_target_memcpy_async returns %d\n", Rc); + DPIF(COPY, "omp_target_memcpy_async returns %d\n", Rc); return Rc; } @@ -479,17 +485,19 @@ omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, int SrcDevice) { OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, " - "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " - "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " - "volume " DPxMOD ", element size %zu, num_dims %d\n", - DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), - DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), - DPxPTR(Volume), ElementSize, NumDims); + DPIF(COPY, + "Call to omp_target_memcpy_rect, dst device %d, src device %d, " + "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " + "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " + "volume " DPxMOD ", element size %zu, num_dims %d\n", + DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), + DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), + DPxPTR(Volume), ElementSize, NumDims); if (!(Dst || Src)) { - DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n", - INT_MAX); + DPIF(COPY, + "Call to omp_target_memcpy_rect returns max supported dimensions %d\n", + INT_MAX); return INT_MAX; } @@ -522,13 +530,14 @@ omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, DstDimensions + 1, SrcDimensions + 1, DstDevice, SrcDevice); if (Rc) { - DP("Recursive call to omp_target_memcpy_rect returns unsuccessfully\n"); + DPIF(COPY, "Recursive call to omp_target_memcpy_rect returns " + "unsuccessfully\n"); return Rc; } } } - DP("omp_target_memcpy_rect returns %d\n", Rc); + DPIF(COPY, "omp_target_memcpy_rect returns %d\n", Rc); return Rc; } @@ -542,18 +551,20 @@ EXTERN int omp_target_memcpy_rect_async( ";size=" + std::to_string(ElementSize) + ";num_dims=" + std::to_string(NumDims)); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " - "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " - "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " - "volume " DPxMOD ", element size %zu, num_dims %d\n", - DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), - DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), - DPxPTR(Volume), ElementSize, NumDims); + DPIF(COPY, + "Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " + "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " + "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " + "volume " DPxMOD ", element size %zu, num_dims %d\n", + DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), + DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), + DPxPTR(Volume), ElementSize, NumDims); // Need to check this first to not return OFFLOAD_FAIL instead if (!Dst && !Src) { - DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n", - INT_MAX); + DPIF(COPY, + "Call to omp_target_memcpy_rect returns max supported dimensions %d\n", + INT_MAX); return INT_MAX; } @@ -570,7 +581,7 @@ EXTERN int omp_target_memcpy_rect_async( int Rc = libomp_helper_task_creation(Args, &libomp_target_memcpy_async_task, DepObjCount, DepObjList); - DP("omp_target_memcpy_rect_async returns %d\n", Rc); + DPIF(COPY, "omp_target_memcpy_rect_async returns %d\n", Rc); return Rc; } @@ -579,9 +590,10 @@ EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, int DeviceNum) { TIMESCOPE(); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - DP("Call to omp_target_associate_ptr with host_ptr " DPxMOD ", " - "device_ptr " DPxMOD ", size %zu, device_offset %zu, device_num %d\n", - DPxPTR(HostPtr), DPxPTR(DevicePtr), Size, DeviceOffset, DeviceNum); + DPIF(MAP, + "Call to omp_target_associate_ptr with host_ptr " DPxMOD ", " + "device_ptr " DPxMOD ", size %zu, device_offset %zu, device_num %d\n", + DPxPTR(HostPtr), DPxPTR(DevicePtr), Size, DeviceOffset, DeviceNum); if (!HostPtr || !DevicePtr || Size <= 0) { REPORT("Call to omp_target_associate_ptr with invalid arguments\n"); @@ -606,16 +618,17 @@ EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, int Rc = DeviceOrErr->getMappingInfo().associatePtr( const_cast(HostPtr), const_cast(DeviceAddr), Size); - DP("omp_target_associate_ptr returns %d\n", Rc); + DPIF(MAP, "omp_target_associate_ptr returns %d\n", Rc); return Rc; } EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) { TIMESCOPE(); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - DP("Call to omp_target_disassociate_ptr with host_ptr " DPxMOD ", " - "device_num %d\n", - DPxPTR(HostPtr), DeviceNum); + DPIF(MAP, + "Call to omp_target_disassociate_ptr with host_ptr " DPxMOD ", " + "device_num %d\n", + DPxPTR(HostPtr), DeviceNum); if (!HostPtr) { REPORT("Call to omp_target_associate_ptr with invalid host_ptr\n"); @@ -639,15 +652,15 @@ EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) { int Rc = DeviceOrErr->getMappingInfo().disassociatePtr( const_cast(HostPtr)); - DP("omp_target_disassociate_ptr returns %d\n", Rc); + DPIF(MAP, "omp_target_disassociate_ptr returns %d\n", Rc); return Rc; } EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { TIMESCOPE(); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - DP("Call to omp_get_mapped_ptr with ptr " DPxMOD ", device_num %d.\n", - DPxPTR(Ptr), DeviceNum); + DPIF(MAP, "Call to omp_get_mapped_ptr with ptr " DPxMOD ", device_num %d.\n", + DPxPTR(Ptr), DeviceNum); if (!Ptr) { REPORT("Call to omp_get_mapped_ptr with nullptr.\n"); @@ -656,13 +669,13 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { int NumDevices = omp_get_initial_device(); if (DeviceNum == NumDevices) { - DP("Device %d is initial device, returning Ptr " DPxMOD ".\n", DeviceNum, - DPxPTR(Ptr)); + DPIF(MAP, "Device %d is initial device, returning Ptr " DPxMOD ".\n", + DeviceNum, DPxPTR(Ptr)); return const_cast(Ptr); } if (NumDevices <= DeviceNum) { - DP("DeviceNum %d is invalid, returning nullptr.\n", DeviceNum); + DPIF(MAP, "DeviceNum %d is invalid, returning nullptr.\n", DeviceNum); return nullptr; } @@ -675,12 +688,13 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false); if (!TPR.isPresent()) { - DP("Ptr " DPxMOD "is not present on device %d, returning nullptr.\n", - DPxPTR(Ptr), DeviceNum); + DPIF(MAP, "Ptr " DPxMOD "is not present on device %d, returning nullptr.\n", + DPxPTR(Ptr), DeviceNum); return nullptr; } - DP("omp_get_mapped_ptr returns " DPxMOD ".\n", DPxPTR(TPR.TargetPointer)); + DPIF(MAP, "omp_get_mapped_ptr returns " DPxMOD ".\n", + DPxPTR(TPR.TargetPointer)); return TPR.TargetPointer; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index c55ef2c2e672c..c8ef607dfde42 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -200,11 +200,12 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, interop_spec_t *Prefers, interop_ctx_t *Ctx, dep_pack_t *Deps) { - DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 - ", number of preferred specs %" PRId32 "%s%s\n", - __func__, DeviceNum, InteropType, NumPrefers, - Ctx->flags.implicit ? " (implicit)" : "", - Ctx->flags.nowait ? " (nowait)" : ""); + DPIF(INTEROP, + "Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) return omp_interop_none; @@ -217,8 +218,9 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, if (InteropType == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop creation not supported yet. " - "Ignored\n"); + DPIF(INTEROP, + "Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); if (Deps) __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); @@ -226,9 +228,10 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, auto DeviceOrErr = PM->getDevice(DeviceNum); if (!DeviceOrErr) { - DP("Couldn't find device %" PRId64 - " while constructing interop object: %s\n", - DeviceNum, toString(DeviceOrErr.takeError()).c_str()); + DPIF(INTEROP, + "Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, toString(DeviceOrErr.takeError()).c_str()); return omp_interop_none; } auto &Device = *DeviceOrErr; @@ -236,12 +239,14 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, auto InteropSpec = Device.RTL->select_interop_preference( DeviceNum, InteropType, NumPrefers, Prefers); if (InteropSpec.fr_id == tgt_fr_none) { - DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + DPIF(INTEROP, "Interop request not supported by device %" PRId64 "\n", + DeviceNum); return omp_interop_none; } - DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", - getForeignRuntimeIdToStr((tgt_foreign_runtime_id_t)InteropSpec.fr_id), - InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + DPIF(INTEROP, + "Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((tgt_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); if (Ctx->flags.implicit) { // This is a request for an RTL managed interop object. @@ -250,17 +255,19 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { Interop = iop; Interop->markDirty(); - DP("Reused interop " DPxMOD " from device number %" PRId64 - " for gtid %" PRId32 "\n", - DPxPTR(Interop), DeviceNum, gtid); + DPIF(INTEROP, + "Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); return Interop; } } } Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); - DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", - DPxPTR(Interop), DeviceNum); + DPIF(INTEROP, + "Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); if (Ctx->flags.implicit) { // register the new implicit interop in the RTL @@ -277,16 +284,18 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, int __tgt_interop_use60(ident_t *LocRef, omp_interop_val_t *Interop, interop_ctx_t *Ctx, dep_pack_t *Deps) { bool Nowait = Ctx->flags.nowait; - DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, - DPxPTR(Interop), Nowait); + DPIF(INTEROP, "Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", + __func__, DPxPTR(Interop), Nowait); if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (Nowait) { - DP("Warning: nowait flag on interop use with dependences not supported" - "yet. Ignored\n"); + DPIF( + INTEROP, + "Warning: nowait flag on interop use with dependences not supported" + "yet. Ignored\n"); Nowait = false; } @@ -318,15 +327,16 @@ int __tgt_interop_use60(ident_t *LocRef, omp_interop_val_t *Interop, int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, interop_ctx_t *Ctx, dep_pack_t *Deps) { - DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); + DPIF(INTEROP, "Call to %s with interop " DPxMOD "\n", __func__, + DPxPTR(Interop)); if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop destroy not supported " - "yet. Ignored\n"); + DPIF(INTEROP, "Warning: nowait flag on interop destroy not supported " + "yet. Ignored\n"); if (Deps) { __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); @@ -346,9 +356,10 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, ompx_interop_cb_t *CB, void *Data) { - DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD - "and data " DPxMOD "\n", - __func__, DPxPTR(Interop), DPxPTR(CB), DPxPTR(Data)); + DPIF(INTEROP, + "Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(CB), DPxPTR(Data)); if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return omp_irc_other; @@ -433,7 +444,7 @@ int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", this); } - DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + DPIF(INTEROP, "Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); runCompletionCbs(); return OFFLOAD_SUCCESS; } @@ -454,8 +465,9 @@ void syncImplicitInterops(int Gtid, void *Event) { if (PM->InteropTbl.size() == 0) return; - DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", - Gtid, DPxPTR(Event)); + DPIF(INTEROP, + "target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", + Gtid, DPxPTR(Event)); for (auto iop : PM->InteropTbl) { if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(Gtid) && @@ -491,7 +503,7 @@ void syncImplicitInterops(int Gtid, void *Event) { } void InteropTblTy::clear() { - DP("Clearing Interop Table\n"); + DPIF(INTEROP, "Clearing Interop Table\n"); PerThreadTable::clear([](auto &IOP) { auto DeviceOrErr = IOP->getDevice(); if (!DeviceOrErr) { diff --git a/offload/libomptarget/OpenMP/Mapping.cpp b/offload/libomptarget/OpenMP/Mapping.cpp index 9b3533895f2a6..cb31d55de323e 100644 --- a/offload/libomptarget/OpenMP/Mapping.cpp +++ b/offload/libomptarget/OpenMP/Mapping.cpp @@ -59,8 +59,9 @@ int MappingInfoTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, bool IsValid = HDTT.HstPtrEnd == (uintptr_t)HstPtrBegin + Size && HDTT.TgtPtrBegin == (uintptr_t)TgtPtrBegin; if (IsValid) { - DP("Attempt to re-associate the same device ptr+offset with the same " - "host ptr, nothing to do\n"); + DPIF(MAP, + "Attempt to re-associate the same device ptr+offset with the same " + "host ptr, nothing to do\n"); return OFFLOAD_SUCCESS; } REPORT("Not allowed to re-associate a different device ptr+offset with " @@ -80,12 +81,14 @@ int MappingInfoTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, /*UseHoldRefCount=*/false, /*Name=*/nullptr, /*IsRefCountINF=*/true)) .first->HDTT; - DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD - ", HstEnd=" DPxMOD ", TgtBegin=" DPxMOD ", DynRefCount=%s, " - "HoldRefCount=%s\n", - DPxPTR(NewEntry.HstPtrBase), DPxPTR(NewEntry.HstPtrBegin), - DPxPTR(NewEntry.HstPtrEnd), DPxPTR(NewEntry.TgtPtrBegin), - NewEntry.dynRefCountToStr().c_str(), NewEntry.holdRefCountToStr().c_str()); + DPIF(MAP, + "Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD + ", HstEnd=" DPxMOD ", TgtBegin=" DPxMOD ", DynRefCount=%s, " + "HoldRefCount=%s\n", + DPxPTR(NewEntry.HstPtrBase), DPxPTR(NewEntry.HstPtrBegin), + DPxPTR(NewEntry.HstPtrEnd), DPxPTR(NewEntry.TgtPtrBegin), + NewEntry.dynRefCountToStr().c_str(), + NewEntry.holdRefCountToStr().c_str()); (void)NewEntry; // Notify the plugin about the new mapping. @@ -114,7 +117,7 @@ int MappingInfoTy::disassociatePtr(void *HstPtrBegin) { } if (HDTT.isDynRefCountInf()) { - DP("Association found, removing it\n"); + DPIF(MAP, "Association found, removing it\n"); void *Event = HDTT.getEvent(); delete &HDTT; if (Event) @@ -135,8 +138,8 @@ LookupResult MappingInfoTy::lookupMapping(HDTTMapAccessorTy &HDTTMap, uintptr_t HP = (uintptr_t)HstPtrBegin; LookupResult LR; - DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%" PRId64 ")...\n", - DPxPTR(HP), Size); + DPIF(MAP, "Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%" PRId64 ")...\n", + DPxPTR(HP), Size); if (HDTTMap->empty()) return LR; @@ -185,12 +188,14 @@ LookupResult MappingInfoTy::lookupMapping(HDTTMapAccessorTy &HDTTMap, } if (LR.Flags.ExtendsBefore) { - DP("WARNING: Pointer is not mapped but section extends into already " - "mapped data\n"); + DPIF(MAP, + "WARNING: Pointer is not mapped but section extends into already " + "mapped data\n"); } if (LR.Flags.ExtendsAfter) { - DP("WARNING: Pointer is already mapped but section extends beyond mapped " - "region\n"); + DPIF(MAP, "WARNING: Pointer is already mapped but section extends beyond " + "mapped " + "region\n"); } } @@ -269,17 +274,19 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer( "Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared " "memory\n", DPxPTR((uintptr_t)HstPtrBegin), Size); - DP("Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared " - "memory\n", - DPxPTR((uintptr_t)HstPtrBegin), Size); + DPIF(MAP, + "Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared " + "memory\n", + DPxPTR((uintptr_t)HstPtrBegin), Size); LR.TPR.Flags.IsPresent = false; LR.TPR.Flags.IsHostPointer = true; LR.TPR.TargetPointer = HstPtrBegin; } } else if (HasPresentModifier) { - DP("Mapping required by 'present' map type modifier does not exist for " - "HstPtrBegin=" DPxMOD ", Size=%" PRId64 "\n", - DPxPTR(HstPtrBegin), Size); + DPIF(MAP, + "Mapping required by 'present' map type modifier does not exist for " + "HstPtrBegin=" DPxMOD ", Size=%" PRId64 "\n", + DPxPTR(HstPtrBegin), Size); MESSAGE("device mapping required by 'present' map type modifier does not " "exist for host address " DPxMOD " (%" PRId64 " bytes)", DPxPTR(HstPtrBegin), Size); @@ -342,14 +349,15 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer( }; if (LR.TPR.getEntry()->foreachShadowPointerInfo(FailOnPtrFound) == OFFLOAD_FAIL) { - DP("Multiple new mappings of %" PRId64 " bytes detected (hst:" DPxMOD - ") -> (tgt:" DPxMOD ")\n", - Size, DPxPTR(HstPtrBegin), DPxPTR(LR.TPR.TargetPointer)); + DPIF(MAP, + "Multiple new mappings of %" PRId64 " bytes detected (hst:" DPxMOD + ") -> (tgt:" DPxMOD ")\n", + Size, DPxPTR(HstPtrBegin), DPxPTR(LR.TPR.TargetPointer)); return std::move(LR.TPR); } - DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size, - DPxPTR(HstPtrBegin), DPxPTR(LR.TPR.TargetPointer)); + DPIF(MAP, "Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", + Size, DPxPTR(HstPtrBegin), DPxPTR(LR.TPR.TargetPointer)); int Ret = Device.submitData(LR.TPR.TargetPointer, HstPtrBegin, Size, AsyncInfo, LR.TPR.getEntry()); @@ -444,9 +452,10 @@ TargetPointerResultTy MappingInfoTy::getTgtPtrBegin( // If the value isn't found in the mapping and unified shared memory // is on then it means we have stumbled upon a value which we need to // use directly from the host. - DP("Get HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared " - "memory\n", - DPxPTR((uintptr_t)HstPtrBegin), Size); + DPIF(MAP, + "Get HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared " + "memory\n", + DPxPTR((uintptr_t)HstPtrBegin), Size); LR.TPR.Flags.IsPresent = false; LR.TPR.Flags.IsHostPointer = true; LR.TPR.TargetPointer = HstPtrBegin; @@ -501,9 +510,10 @@ int MappingInfoTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) { assert(Entry && "Trying to deallocate a null entry."); - DP("Deleting tgt data " DPxMOD " of size %" PRId64 " by freeing allocation " - "starting at " DPxMOD "\n", - DPxPTR(Entry->TgtPtrBegin), Size, DPxPTR(Entry->TgtAllocBegin)); + DPIF(MAP, + "Deleting tgt data " DPxMOD " of size %" PRId64 " by freeing allocation " + "starting at " DPxMOD "\n", + DPxPTR(Entry->TgtPtrBegin), Size, DPxPTR(Entry->TgtAllocBegin)); void *Event = Entry->getEvent(); if (Event && Device.destroyEvent(Event) != OFFLOAD_SUCCESS) { diff --git a/offload/libomptarget/OpenMP/OMPT/Callback.cpp b/offload/libomptarget/OpenMP/OMPT/Callback.cpp index ab0942ed4fd3f..449b3236eadac 100644 --- a/offload/libomptarget/OpenMP/OMPT/Callback.cpp +++ b/offload/libomptarget/OpenMP/OMPT/Callback.cpp @@ -410,11 +410,13 @@ void Interface::endTarget(int64_t DeviceId, void *Code) { } void Interface::beginTargetDataOperation() { - DP("in ompt_target_region_begin (TargetRegionId = %lu)\n", TargetData.value); + DPIF(TOOL, "in ompt_target_region_begin (TargetRegionId = %lu)\n", + TargetData.value); } void Interface::endTargetDataOperation() { - DP("in ompt_target_region_end (TargetRegionId = %lu)\n", TargetData.value); + DPIF(TOOL, "in ompt_target_region_end (TargetRegionId = %lu)\n", + TargetData.value); } void Interface::beginTargetRegion() { @@ -462,12 +464,12 @@ class LibomptargetRtlFinalizer { int llvm::omp::target::ompt::initializeLibrary(ompt_function_lookup_t lookup, int initial_device_num, ompt_data_t *tool_data) { - DP("Executing initializeLibrary\n"); + DPIF(TOOL, "Executing initializeLibrary\n"); #define bindOmptFunctionName(OmptFunction, DestinationFunction) \ if (lookup) \ DestinationFunction = (OmptFunction##_t)lookup(#OmptFunction); \ - DP("initializeLibrary bound %s=%p\n", #DestinationFunction, \ - ((void *)(uint64_t)DestinationFunction)); + DPIF(TOOL, "initializeLibrary bound %s=%p\n", #DestinationFunction, \ + ((void *)(uint64_t)DestinationFunction)); bindOmptFunctionName(ompt_get_callback, lookupCallbackByCode); bindOmptFunctionName(ompt_get_task_data, ompt_get_task_data_fn); @@ -493,7 +495,7 @@ int llvm::omp::target::ompt::initializeLibrary(ompt_function_lookup_t lookup, } void llvm::omp::target::ompt::finalizeLibrary(ompt_data_t *data) { - DP("Executing finalizeLibrary\n"); + DPIF(TOOL, "Executing finalizeLibrary\n"); // Before disabling OMPT, call the (plugin) finalizations that were registered // with this library LibraryFinalizer->finalize(); @@ -502,7 +504,7 @@ void llvm::omp::target::ompt::finalizeLibrary(ompt_data_t *data) { } void llvm::omp::target::ompt::connectLibrary() { - DP("Entering connectLibrary\n"); + DPIF(TOOL, "Entering connectLibrary\n"); // Connect with libomp static OmptLibraryConnectorTy LibompConnector("libomp"); static ompt_start_tool_result_t OmptResult; @@ -525,7 +527,7 @@ void llvm::omp::target::ompt::connectLibrary() { FOREACH_OMPT_EMI_EVENT(bindOmptCallback) #undef bindOmptCallback - DP("Exiting connectLibrary\n"); + DPIF(TOOL, "Exiting connectLibrary\n"); } #endif // OMPT_SUPPORT diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index c8d6b42114d0f..ba998bfdad8ea 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -32,11 +32,11 @@ PluginManager *PM = nullptr; void PluginManager::init() { TIMESCOPE(); if (OffloadPolicy::isOffloadDisabled()) { - DP("Offload is disabled. Skipping plugin initialization\n"); + DPIF(RTL, "Offload is disabled. Skipping plugin initialization\n"); return; } - DP("Loading RTLs...\n"); + DPIF(RTL, "Loading RTLs...\n"); // Attempt to create an instance of each supported plugin. #define PLUGIN_TARGET(Name) \ @@ -46,12 +46,12 @@ void PluginManager::init() { } while (false); #include "Shared/Targets.def" - DP("RTLs loaded!\n"); + DPIF(RTL, "RTLs loaded!\n"); } void PluginManager::deinit() { TIMESCOPE(); - DP("Unloading RTLs...\n"); + DPIF(RTL, "Unloading RTLs...\n"); for (auto &Plugin : Plugins) { if (!Plugin->is_initialized()) @@ -59,12 +59,12 @@ void PluginManager::deinit() { if (auto Err = Plugin->deinit()) { [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); - DP("Failed to deinit plugin: %s\n", InfoMsg.c_str()); + DPIF(RTL, "Failed to deinit plugin: %s\n", InfoMsg.c_str()); } Plugin.release(); } - DP("RTLs unloaded!\n"); + DPIF(RTL, "RTLs unloaded!\n"); } bool PluginManager::initializePlugin(GenericPluginTy &Plugin) { @@ -73,12 +73,12 @@ bool PluginManager::initializePlugin(GenericPluginTy &Plugin) { if (auto Err = Plugin.init()) { [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); - DP("Failed to init plugin: %s\n", InfoMsg.c_str()); + DPIF(RTL, "Failed to init plugin: %s\n", InfoMsg.c_str()); return false; } - DP("Registered plugin %s with %d visible device(s)\n", Plugin.getName(), - Plugin.number_of_devices()); + DPIF(RTL, "Registered plugin %s with %d visible device(s)\n", + Plugin.getName(), Plugin.number_of_devices()); return true; } @@ -105,7 +105,7 @@ bool PluginManager::initializeDevice(GenericPluginTy &Plugin, auto Device = std::make_unique(&Plugin, UserId, DeviceId); if (auto Err = Device->init()) { [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); - DP("Failed to init device %d: %s\n", DeviceId, InfoMsg.c_str()); + DPIF(RTL, "Failed to init device %d: %s\n", DeviceId, InfoMsg.c_str()); return false; } @@ -229,7 +229,7 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { continue; if (!R.number_of_devices()) { - DP("Skipping plugin %s with no visible devices\n", R.getName()); + DPIF(RTL, "Skipping plugin %s with no visible devices\n", R.getName()); continue; } @@ -239,17 +239,18 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { // registered for the same device in the case that they are mutually // compatible, such as sm_80 and sm_89. if (UsedDevices[&R].contains(DeviceId)) { - DP("Image " DPxMOD - " is a duplicate, not loaded on RTL %s device %d!\n", - DPxPTR(Img->ImageStart), R.getName(), DeviceId); + DPIF(RTL, + "Image " DPxMOD + " is a duplicate, not loaded on RTL %s device %d!\n", + DPxPTR(Img->ImageStart), R.getName(), DeviceId); continue; } if (!R.isDeviceCompatible(DeviceId, Buffer)) continue; - DP("Image " DPxMOD " is compatible with RTL %s device %d!\n", - DPxPTR(Img->ImageStart), R.getName(), DeviceId); + DPIF(RTL, "Image " DPxMOD " is compatible with RTL %s device %d!\n", + DPxPTR(Img->ImageStart), R.getName(), DeviceId); if (!initializeDevice(R, DeviceId)) continue; @@ -269,8 +270,8 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { TranslationTable &TT = (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin]; - DP("Registering image " DPxMOD " with RTL %s!\n", - DPxPTR(Img->ImageStart), R.getName()); + DPIF(RTL, "Registering image " DPxMOD " with RTL %s!\n", + DPxPTR(Img->ImageStart), R.getName()); auto UserId = PM->DeviceIds[std::make_pair(&R, DeviceId)]; if (TT.TargetsTable.size() < static_cast(UserId + 1)) { @@ -292,7 +293,8 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { } } if (!FoundRTL) - DP("No RTL found for image " DPxMOD "!\n", DPxPTR(Img->ImageStart)); + DPIF(RTL, "No RTL found for image " DPxMOD "!\n", + DPxPTR(Img->ImageStart)); } PM->RTLsMtx.unlock(); @@ -309,7 +311,7 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { if (UseAutoZeroCopy) addRequirements(OMPX_REQ_AUTO_ZERO_COPY); - DP("Done registering entries!\n"); + DPIF(RTL, "Done registering entries!\n"); } // Temporary forward declaration, old style CTor/DTor handling is going away. @@ -317,7 +319,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo); void PluginManager::unregisterLib(__tgt_bin_desc *Desc) { - DP("Unloading target library!\n"); + DPIF(RTL, "Unloading target library!\n"); Desc = upgradeLegacyEntries(Desc); @@ -341,19 +343,20 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) { FoundRTL = &R; - DP("Unregistered image " DPxMOD " from RTL\n", DPxPTR(Img->ImageStart)); + DPIF(RTL, "Unregistered image " DPxMOD " from RTL\n", + DPxPTR(Img->ImageStart)); break; } // if no RTL was found proceed to unregister the next image if (!FoundRTL) { - DP("No RTLs in use support the image " DPxMOD "!\n", - DPxPTR(Img->ImageStart)); + DPIF(RTL, "No RTLs in use support the image " DPxMOD "!\n", + DPxPTR(Img->ImageStart)); } } PM->RTLsMtx.unlock(); - DP("Done unregistering images!\n"); + DPIF(RTL, "Done unregistering images!\n"); // Remove entries from PM->HostPtrToTableMap PM->TblMapMtx.lock(); @@ -367,18 +370,20 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) { auto TransTable = PM->HostEntriesBeginToTransTable.find(Desc->HostEntriesBegin); if (TransTable != PM->HostEntriesBeginToTransTable.end()) { - DP("Removing translation table for descriptor " DPxMOD "\n", - DPxPTR(Desc->HostEntriesBegin)); + DPIF(RTL, "Removing translation table for descriptor " DPxMOD "\n", + DPxPTR(Desc->HostEntriesBegin)); PM->HostEntriesBeginToTransTable.erase(TransTable); } else { - DP("Translation table for descriptor " DPxMOD " cannot be found, probably " - "it has been already removed.\n", - DPxPTR(Desc->HostEntriesBegin)); + DPIF(RTL, + "Translation table for descriptor " DPxMOD + " cannot be found, probably " + "it has been already removed.\n", + DPxPTR(Desc->HostEntriesBegin)); } PM->TblMapMtx.unlock(); - DP("Done unregistering library!\n"); + DPIF(RTL, "Done unregistering library!\n"); } /// Map global data and execute pending ctors @@ -393,8 +398,8 @@ static int loadImagesOntoDevice(DeviceTy &Device) { for (auto *HostEntriesBegin : PM->HostEntriesBeginRegistrationOrder) { TranslationTable *TransTable = &PM->HostEntriesBeginToTransTable[HostEntriesBegin]; - DP("Trans table %p : %p\n", TransTable->HostTable.EntriesBegin, - TransTable->HostTable.EntriesEnd); + DPIF(RTL, "Trans table %p : %p\n", TransTable->HostTable.EntriesBegin, + TransTable->HostTable.EntriesEnd); if (TransTable->HostTable.EntriesBegin == TransTable->HostTable.EntriesEnd) { // No host entry so no need to proceed @@ -456,9 +461,9 @@ static int loadImagesOntoDevice(DeviceTy &Device) { &DeviceEntry.Address) != OFFLOAD_SUCCESS) REPORT("Failed to load kernel %s\n", Entry.SymbolName); } - DP("Entry point " DPxMOD " maps to%s %s (" DPxMOD ")\n", - DPxPTR(Entry.Address), (Entry.Size) ? " global" : "", - Entry.SymbolName, DPxPTR(DeviceEntry.Address)); + DPIF(MAP, "Entry point " DPxMOD " maps to%s %s (" DPxMOD ")\n", + DPxPTR(Entry.Address), (Entry.Size) ? " global" : "", + Entry.SymbolName, DPxPTR(DeviceEntry.Address)); DeviceEntries.emplace_back(DeviceEntry); } @@ -509,10 +514,12 @@ static int loadImagesOntoDevice(DeviceTy &Device) { CurrDeviceEntryAddr = DevPtr; } - DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu" - ", name \"%s\"\n", - DPxPTR(CurrHostEntry->Address), DPxPTR(CurrDeviceEntry->Address), - CurrDeviceEntry->Size, CurrDeviceEntry->SymbolName); + DPIF(MAP, + "Add mapping from host " DPxMOD " to device " DPxMOD + " with size %zu" + ", name \"%s\"\n", + DPxPTR(CurrHostEntry->Address), DPxPTR(CurrDeviceEntry->Address), + CurrDeviceEntry->Size, CurrDeviceEntry->SymbolName); HDTTMap->emplace(new HostDataToTargetTy( (uintptr_t)CurrHostEntry->Address /*HstPtrBase*/, (uintptr_t)CurrHostEntry->Address /*HstPtrBegin*/, diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp index 71423ae0c94d9..b3fbbf8e7eed1 100644 --- a/offload/libomptarget/device.cpp +++ b/offload/libomptarget/device.cpp @@ -278,8 +278,9 @@ int32_t DeviceTy::dataFence(AsyncInfoTy &AsyncInfo) { } int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) { - DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n", - DPxPTR(HstPtr), Size); + DPIF(MAP, + "Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n", + DPxPTR(HstPtr), Size); if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) { REPORT("Notifying about data mapping failed.\n"); @@ -289,7 +290,8 @@ int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) { } int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) { - DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n", DPxPTR(HstPtr)); + DPIF(MAP, "Notifying about an unmapping: HstPtr=" DPxMOD "\n", + DPxPTR(HstPtr)); if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) { REPORT("Notifying about data unmapping failed.\n"); diff --git a/offload/libomptarget/interface.cpp b/offload/libomptarget/interface.cpp index fe18289765906..9446b743e149b 100644 --- a/offload/libomptarget/interface.cpp +++ b/offload/libomptarget/interface.cpp @@ -49,25 +49,26 @@ using namespace llvm::omp::target::ompt; // This step might be skipped if offload is disabled. bool checkDevice(int64_t &DeviceID, ident_t *Loc) { if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) { - DP("Offload is disabled\n"); + DPIF(DEVICE, "Offload is disabled\n"); return true; } if (DeviceID == OFFLOAD_DEVICE_DEFAULT) { DeviceID = omp_get_default_device(); - DP("Use default device id %" PRId64 "\n", DeviceID); + DPIF(DEVICE, "Use default device id %" PRId64 "\n", DeviceID); } // Proposed behavior for OpenMP 5.2 in OpenMP spec github issue 2669. if (omp_get_num_devices() == 0) { - DP("omp_get_num_devices() == 0 but offload is manadatory\n"); + DPIF(DEVICE, "omp_get_num_devices() == 0 but offload is manadatory\n"); handleTargetOutcome(false, Loc); return true; } if (DeviceID == omp_get_initial_device()) { - DP("Device is host (%" PRId64 "), returning as if offload is disabled\n", - DeviceID); + DPIF(DEVICE, + "Device is host (%" PRId64 "), returning as if offload is disabled\n", + DeviceID); return true; } return false; @@ -123,11 +124,11 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy", "NumArgs=" + std::to_string(ArgNum), Loc); - DP("Entering data %s region for device %" PRId64 " with %d mappings\n", - RegionName, DeviceId, ArgNum); + DPIF(MAP, "Entering data %s region for device %" PRId64 " with %d mappings\n", + RegionName, DeviceId, ArgNum); if (checkDevice(DeviceId, Loc)) { - DP("Not offloading to device %" PRId64 "\n", DeviceId); + DPIF(MAP, "Not offloading to device %" PRId64 "\n", DeviceId); return; } @@ -136,10 +137,11 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, RegionTypeMsg); #ifdef OMPTARGET_DEBUG for (int I = 0; I < ArgNum; ++I) { - DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 - ", Type=0x%" PRIx64 ", Name=%s\n", - I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], - (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); + DPIF(MAP, + "Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 + ", Type=0x%" PRIx64 ", Name=%s\n", + I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], + (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); } #endif @@ -274,7 +276,7 @@ static KernelArgsTy *upgradeKernelArgs(KernelArgsTy *KernelArgs, KernelArgsTy &LocalKernelArgs, int32_t NumTeams, int32_t ThreadLimit) { if (KernelArgs->Version > OMP_KERNEL_ARG_VERSION) - DP("Unexpected ABI version: %u\n", KernelArgs->Version); + DPIF(KERNEL, "Unexpected ABI version: %u\n", KernelArgs->Version); uint32_t UpgradedVersion = KernelArgs->Version; if (KernelArgs->Version < OMP_KERNEL_ARG_VERSION) { @@ -326,12 +328,13 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, assert(PM && "Runtime not initialized"); static_assert(std::is_convertible_v, "Target AsyncInfoTy must be convertible to AsyncInfoTy."); - DP("Entering target region for device %" PRId64 " with entry point " DPxMOD - "\n", - DeviceId, DPxPTR(HostPtr)); + DPIF(KERNEL, + "Entering target region for device %" PRId64 " with entry point " DPxMOD + "\n", + DeviceId, DPxPTR(HostPtr)); if (checkDevice(DeviceId, Loc)) { - DP("Not offloading to device %" PRId64 "\n", DeviceId); + DPIF(KERNEL, "Not offloading to device %" PRId64 "\n", DeviceId); return OMP_TGT_FAIL; } @@ -356,13 +359,14 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, KernelArgs->ArgNames, "Entering OpenMP kernel"); #ifdef OMPTARGET_DEBUG for (uint32_t I = 0; I < KernelArgs->NumArgs; ++I) { - DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 - ", Type=0x%" PRIx64 ", Name=%s\n", - I, DPxPTR(KernelArgs->ArgBasePtrs[I]), DPxPTR(KernelArgs->ArgPtrs[I]), - KernelArgs->ArgSizes[I], KernelArgs->ArgTypes[I], - (KernelArgs->ArgNames) - ? getNameFromMapping(KernelArgs->ArgNames[I]).c_str() - : "unknown"); + DPIF(KERNEL, + "Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 + ", Type=0x%" PRIx64 ", Name=%s\n", + I, DPxPTR(KernelArgs->ArgBasePtrs[I]), DPxPTR(KernelArgs->ArgPtrs[I]), + KernelArgs->ArgSizes[I], KernelArgs->ArgTypes[I], + (KernelArgs->ArgNames) + ? getNameFromMapping(KernelArgs->ArgNames[I]).c_str() + : "unknown"); } #endif @@ -463,7 +467,7 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, assert(PM && "Runtime not initialized"); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); if (checkDevice(DeviceId, Loc)) { - DP("Not offloading to device %" PRId64 "\n", DeviceId); + DPIF(KERNEL, "Not offloading to device %" PRId64 "\n", DeviceId); return OMP_TGT_FAIL; } auto DeviceOrErr = PM->getDevice(DeviceId); @@ -491,8 +495,9 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; int64_t Size = MapperComponentsPtr->Components.size(); - DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", - DPxPTR(RtMapperHandle), Size); + DPIF(MAP, + "__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", + DPxPTR(RtMapperHandle), Size); return Size; } @@ -500,11 +505,12 @@ EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base, void *Begin, int64_t Size, int64_t Type, void *Name) { - DP("__tgt_push_mapper_component(Handle=" DPxMOD - ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 - ", Type=0x%" PRIx64 ", Name=%s).\n", - DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type, - (Name) ? getNameFromMapping(Name).c_str() : "unknown"); + DPIF(MAP, + "__tgt_push_mapper_component(Handle=" DPxMOD + ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 + ", Type=0x%" PRIx64 ", Name=%s).\n", + DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type, + (Name) ? getNameFromMapping(Name).c_str() : "unknown"); auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; MapperComponentsPtr->Components.push_back( MapComponentInfoTy(Base, Begin, Size, Type, Name)); diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp index 69725e77bae00..bece9962b494e 100644 --- a/offload/libomptarget/omptarget.cpp +++ b/offload/libomptarget/omptarget.cpp @@ -200,10 +200,11 @@ static int32_t getParentIndex(int64_t Type) { void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, const char *Name) { - DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size); + DPIF(MEMORY, "Call to %s for device %d requesting %zu bytes\n", Name, + DeviceNum, Size); if (Size <= 0) { - DP("Call to %s with non-positive length\n", Name); + DPIF(MEMORY, "Call to %s with non-positive length\n", Name); return NULL; } @@ -211,7 +212,7 @@ void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, if (DeviceNum == omp_get_initial_device()) { Rc = malloc(Size); - DP("%s returns host ptr " DPxMOD "\n", Name, DPxPTR(Rc)); + DPIF(MEMORY, "%s returns host ptr " DPxMOD "\n", Name, DPxPTR(Rc)); return Rc; } @@ -220,23 +221,23 @@ void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); Rc = DeviceOrErr->allocData(Size, nullptr, Kind); - DP("%s returns device ptr " DPxMOD "\n", Name, DPxPTR(Rc)); + DPIF(MEMORY, "%s returns device ptr " DPxMOD "\n", Name, DPxPTR(Rc)); return Rc; } void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, const char *Name) { - DP("Call to %s for device %d and address " DPxMOD "\n", Name, DeviceNum, - DPxPTR(DevicePtr)); + DPIF(MEMORY, "Call to %s for device %d and address " DPxMOD "\n", Name, + DeviceNum, DPxPTR(DevicePtr)); if (!DevicePtr) { - DP("Call to %s with NULL ptr\n", Name); + DPIF(MEMORY, "Call to %s with NULL ptr\n", Name); return; } if (DeviceNum == omp_get_initial_device()) { free(DevicePtr); - DP("%s deallocated host ptr\n", Name); + DPIF(MEMORY, "%s deallocated host ptr\n", Name); return; } @@ -249,15 +250,16 @@ void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, "Failed to deallocate device ptr. Set " "OFFLOAD_TRACK_ALLOCATION_TRACES=1 to track allocations."); - DP("omp_target_free deallocated device ptr\n"); + DPIF(MEMORY, "omp_target_free deallocated device ptr\n"); } void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, const char *Name) { - DP("Call to %s for device %d locking %zu bytes\n", Name, DeviceNum, Size); + DPIF(MEMORY, "Call to %s for device %d locking %zu bytes\n", Name, DeviceNum, + Size); if (Size <= 0) { - DP("Call to %s with non-positive length\n", Name); + DPIF(MEMORY, "Call to %s with non-positive length\n", Name); return NULL; } @@ -270,22 +272,22 @@ void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, int32_t Err = 0; Err = DeviceOrErr->RTL->data_lock(DeviceNum, HostPtr, Size, &RC); if (Err) { - DP("Could not lock ptr %p\n", HostPtr); + DPIF(MEMORY, "Could not lock ptr %p\n", HostPtr); return nullptr; } - DP("%s returns device ptr " DPxMOD "\n", Name, DPxPTR(RC)); + DPIF(MEMORY, "%s returns device ptr " DPxMOD "\n", Name, DPxPTR(RC)); return RC; } void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name) { - DP("Call to %s for device %d unlocking\n", Name, DeviceNum); + DPIF(MEMORY, "Call to %s for device %d unlocking\n", Name, DeviceNum); auto DeviceOrErr = PM->getDevice(DeviceNum); if (!DeviceOrErr) FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); DeviceOrErr->RTL->data_unlock(DeviceNum, HostPtr); - DP("%s returns\n", Name); + DPIF(MEMORY, "%s returns\n", Name); } /// Call the user-defined mapper function followed by the appropriate @@ -295,7 +297,7 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg, void *ArgMapper, AsyncInfoTy &AsyncInfo, TargetDataFuncPtrTy TargetDataFunction, AttachInfoTy *AttachInfo = nullptr) { - DP("Calling the mapper function " DPxMOD "\n", DPxPTR(ArgMapper)); + DPIF(MAP, "Calling the mapper function " DPxMOD "\n", DPxPTR(ArgMapper)); // The mapper function fills up Components. MapperComponentsTy MapperComponents; @@ -368,12 +370,14 @@ static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin, void *TgtPteeBase = reinterpret_cast( reinterpret_cast(TgtPteeBegin) - Delta); - DP("HstPteeBase: " DPxMOD ", HstPteeBegin: " DPxMOD - ", Delta (HstPteeBegin - HstPteeBase): %" PRIu64 ".\n", - DPxPTR(HstPteeBase), DPxPTR(HstPteeBegin), Delta); - DP("TgtPteeBase (TgtPteeBegin - Delta): " DPxMOD ", TgtPteeBegin : " DPxMOD - "\n", - DPxPTR(TgtPteeBase), DPxPTR(TgtPteeBegin)); + DPIF(MAP, + "HstPteeBase: " DPxMOD ", HstPteeBegin: " DPxMOD + ", Delta (HstPteeBegin - HstPteeBase): %" PRIu64 ".\n", + DPxPTR(HstPteeBase), DPxPTR(HstPteeBegin), Delta); + DPIF(MAP, + "TgtPteeBase (TgtPteeBegin - Delta): " DPxMOD ", TgtPteeBegin : " DPxMOD + "\n", + DPxPTR(TgtPteeBase), DPxPTR(TgtPteeBegin)); return TgtPteeBase; } @@ -453,13 +457,13 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo, // Add shadow pointer tracking if (!PtrTPR.getEntry()->addShadowPointer( ShadowPtrInfoTy{HstPtrAddr, TgtPtrAddr, TgtPteeBase, HstPtrSize})) { - DP("Pointer " DPxMOD " is already attached to " DPxMOD "\n", - DPxPTR(TgtPtrAddr), DPxPTR(TgtPteeBase)); + DPIF(MAP, "Pointer " DPxMOD " is already attached to " DPxMOD "\n", + DPxPTR(TgtPtrAddr), DPxPTR(TgtPteeBase)); return OFFLOAD_SUCCESS; } - DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(TgtPtrAddr), - DPxPTR(TgtPteeBase)); + DPIF(MAP, "Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(TgtPtrAddr), + DPxPTR(TgtPteeBase)); // Lambda to handle submitData result and perform final steps. auto HandleSubmitResult = [&](int SubmitResult) -> int { @@ -491,11 +495,12 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo, std::memcpy(SrcBuffer + VoidPtrSize, HstDescriptorFieldsAddr, HstDescriptorFieldsSize); - DP("Updating %" PRId64 " bytes of descriptor (" DPxMOD - ") (pointer + %" PRId64 " additional bytes from host descriptor " DPxMOD - ")\n", - HstPtrSize, DPxPTR(TgtPtrAddr), HstDescriptorFieldsSize, - DPxPTR(HstDescriptorFieldsAddr)); + DPIF(MAP, + "Updating %" PRId64 " bytes of descriptor (" DPxMOD + ") (pointer + %" PRId64 + " additional bytes from host descriptor " DPxMOD ")\n", + HstPtrSize, DPxPTR(TgtPtrAddr), HstDescriptorFieldsSize, + DPxPTR(HstDescriptorFieldsAddr)); } // Submit the populated source buffer to device. @@ -524,7 +529,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // Instead of executing the regular path of targetDataBegin, call the // targetDataMapper variant which will call targetDataBegin again // with new arguments. - DP("Calling targetDataMapper for the %dth argument\n", I); + DPIF(MAP, "Calling targetDataMapper for the %dth argument\n", I); map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I]; int Rc = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I], @@ -561,7 +566,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, /*PointerSize=*/DataSize, /*MapType=*/ArgTypes[I], /*PointeeName=*/HstPtrName); - DP("Deferring ATTACH map-type processing for argument %d\n", I); + DPIF(MAP, "Deferring ATTACH map-type processing for argument %d\n", I); continue; } @@ -575,9 +580,10 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, int64_t Alignment = getPartialStructRequiredAlignment(HstPtrBase); TgtPadding = (int64_t)HstPtrBegin % Alignment; if (TgtPadding) { - DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD - "\n", - TgtPadding, DPxPTR(HstPtrBegin)); + DPIF(MAP, + "Using a padding of %" PRId64 " bytes for begin address " DPxMOD + "\n", + TgtPadding, DPxPTR(HstPtrBegin)); } } @@ -602,7 +608,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, MappingInfoTy::HDTTMapAccessorTy HDTTMap = Device.getMappingInfo().HostDataToTargetMap.getExclusiveAccessor(); if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { - DP("Has a pointer entry: \n"); + DPIF(MAP, "Has a pointer entry: \n"); // Base is address of pointer. // // Usually, the pointer is already allocated by this time. For example: @@ -635,10 +641,12 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, if (PointerTpr.Flags.IsNewEntry && !IsHostPtr) AttachInfo->NewAllocations[HstPtrBase] = sizeof(void *); - DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new" - "\n", - sizeof(void *), DPxPTR(PointerTgtPtrBegin), - (PointerTpr.Flags.IsNewEntry ? "" : " not")); + DPIF(MAP, + "There are %zu bytes allocated at target address " DPxMOD + " - is%s new" + "\n", + sizeof(void *), DPxPTR(PointerTgtPtrBegin), + (PointerTpr.Flags.IsNewEntry ? "" : " not")); PointerHstPtrBegin = HstPtrBase; // modify current entry. HstPtrBase = *reinterpret_cast(HstPtrBase); @@ -670,14 +678,15 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, if (TPR.Flags.IsNewEntry && !IsHostPtr && TgtPtrBegin) AttachInfo->NewAllocations[HstPtrBegin] = DataSize; - DP("There are %" PRId64 " bytes allocated at target address " DPxMOD - " - is%s new\n", - DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsNewEntry ? "" : " not")); + DPIF(MAP, + "There are %" PRId64 " bytes allocated at target address " DPxMOD + " - is%s new\n", + DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsNewEntry ? "" : " not")); if (ArgTypes[I] & OMP_TGT_MAPTYPE_RETURN_PARAM) { uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase; void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta); - DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase)); + DPIF(MAP, "Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase)); ArgsBase[I] = TgtPtrBase; } @@ -755,19 +764,19 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo, AsyncInfoTy &AsyncInfo) { // Report all tracked allocations from both main loop and ATTACH processing if (!AttachInfo.NewAllocations.empty()) { - DP("Tracked %u total new allocations:\n", - (unsigned)AttachInfo.NewAllocations.size()); + DPIF(MAP, "Tracked %u total new allocations:\n", + (unsigned)AttachInfo.NewAllocations.size()); for ([[maybe_unused]] const auto &Alloc : AttachInfo.NewAllocations) { - DP(" Host ptr: " DPxMOD ", Size: %" PRId64 " bytes\n", - DPxPTR(Alloc.first), Alloc.second); + DPIF(MAP, " Host ptr: " DPxMOD ", Size: %" PRId64 " bytes\n", + DPxPTR(Alloc.first), Alloc.second); } } if (AttachInfo.AttachEntries.empty()) return OFFLOAD_SUCCESS; - DP("Processing %zu deferred ATTACH map entries\n", - AttachInfo.AttachEntries.size()); + DPIF(MAP, "Processing %zu deferred ATTACH map entries\n", + AttachInfo.AttachEntries.size()); int Ret = OFFLOAD_SUCCESS; bool IsFirstPointerAttachment = true; @@ -783,9 +792,10 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo, int64_t PtrSize = AttachEntry.PointerSize; int64_t MapType = AttachEntry.MapType; - DP("Processing ATTACH entry %zu: HstPtr=" DPxMOD ", HstPteeBegin=" DPxMOD - ", Size=%" PRId64 ", Type=0x%" PRIx64 "\n", - EntryIdx, DPxPTR(HstPtr), DPxPTR(HstPteeBegin), PtrSize, MapType); + DPIF(MAP, + "Processing ATTACH entry %zu: HstPtr=" DPxMOD ", HstPteeBegin=" DPxMOD + ", Size=%" PRId64 ", Type=0x%" PRIx64 "\n", + EntryIdx, DPxPTR(HstPtr), DPxPTR(HstPteeBegin), PtrSize, MapType); const bool IsAttachAlways = MapType & OMP_TGT_MAPTYPE_ALWAYS; @@ -799,8 +809,8 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo, Ptr < reinterpret_cast( reinterpret_cast(AllocPtr) + AllocSize); }); - DP("Attach %s " DPxMOD " was newly allocated: %s\n", PtrName, DPxPTR(Ptr), - IsNewlyAllocated ? "yes" : "no"); + DPIF(MAP, "Attach %s " DPxMOD " was newly allocated: %s\n", PtrName, + DPxPTR(Ptr), IsNewlyAllocated ? "yes" : "no"); return IsNewlyAllocated; }; @@ -808,9 +818,10 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo, // allocated, or the ALWAYS flag is set. if (!IsAttachAlways && !WasNewlyAllocated(HstPteeBegin, "pointee") && !WasNewlyAllocated(HstPtr, "pointer")) { - DP("Skipping ATTACH entry %zu: neither pointer nor pointee was newly " - "allocated and no ALWAYS flag\n", - EntryIdx); + DPIF(MAP, + "Skipping ATTACH entry %zu: neither pointer nor pointee was newly " + "allocated and no ALWAYS flag\n", + EntryIdx); continue; } @@ -824,19 +835,20 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo, Ptr, Size, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false, /*MustContain=*/true); - DP("Attach %s lookup - IsPresent=%s, IsHostPtr=%s\n", PtrType, - TPR.isPresent() ? "yes" : "no", - TPR.Flags.IsHostPointer ? "yes" : "no"); + DPIF(MAP, "Attach %s lookup - IsPresent=%s, IsHostPtr=%s\n", PtrType, + TPR.isPresent() ? "yes" : "no", + TPR.Flags.IsHostPointer ? "yes" : "no"); if (!TPR.isPresent()) { - DP("Skipping ATTACH entry %zu: %s not present on device\n", EntryIdx, - PtrType); + DPIF(MAP, "Skipping ATTACH entry %zu: %s not present on device\n", + EntryIdx, PtrType); return std::nullopt; } if (TPR.Flags.IsHostPointer) { - DP("Skipping ATTACH entry %zu: device version of the %s is a host " - "pointer.\n", - EntryIdx, PtrType); + DPIF(MAP, + "Skipping ATTACH entry %zu: device version of the %s is a host " + "pointer.\n", + EntryIdx, PtrType); return std::nullopt; } @@ -865,7 +877,8 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo, // Insert a data-fence before the first pointer-attachment. if (IsFirstPointerAttachment) { IsFirstPointerAttachment = false; - DP("Inserting a data fence before the first pointer attachment.\n"); + DPIF(MAP, + "Inserting a data fence before the first pointer attachment.\n"); Ret = Device.dataFence(AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { REPORT("Failed to insert data fence.\n"); @@ -881,7 +894,7 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo, if (Ret != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - DP("ATTACH entry %zu processed successfully\n", EntryIdx); + DPIF(MAP, "ATTACH entry %zu processed successfully\n", EntryIdx); } return OFFLOAD_SUCCESS; @@ -966,16 +979,18 @@ postProcessingTargetDataEnd(DeviceTy *Device, Entry->foreachShadowPointerInfo([&](const ShadowPtrInfoTy &ShadowPtr) { constexpr int64_t VoidPtrSize = sizeof(void *); if (ShadowPtr.PtrSize > VoidPtrSize) { - DP("Restoring host descriptor " DPxMOD - " to its original content (%" PRId64 - " bytes), containing pointee address " DPxMOD "\n", - DPxPTR(ShadowPtr.HstPtrAddr), ShadowPtr.PtrSize, - DPxPTR(ShadowPtr.HstPtrContent.data())); + DPIF(MAP, + "Restoring host descriptor " DPxMOD + " to its original content (%" PRId64 + " bytes), containing pointee address " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrAddr), ShadowPtr.PtrSize, + DPxPTR(ShadowPtr.HstPtrContent.data())); } else { - DP("Restoring host pointer " DPxMOD " to its original value " DPxMOD - "\n", - DPxPTR(ShadowPtr.HstPtrAddr), - DPxPTR(ShadowPtr.HstPtrContent.data())); + DPIF(MAP, + "Restoring host pointer " DPxMOD " to its original value " DPxMOD + "\n", + DPxPTR(ShadowPtr.HstPtrAddr), + DPxPTR(ShadowPtr.HstPtrContent.data())); } std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(), ShadowPtr.PtrSize); @@ -1024,7 +1039,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // directives. They may be encountered here while handling the "end" part of // "#pragma omp target". if (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH) { - DP("Ignoring ATTACH entry %d in targetDataEnd\n", I); + DPIF(MAP, "Ignoring ATTACH entry %d in targetDataEnd\n", I); continue; } @@ -1032,7 +1047,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // Instead of executing the regular path of targetDataEnd, call the // targetDataMapper variant which will call targetDataEnd again // with new arguments. - DP("Calling targetDataMapper for the %dth argument\n", I); + DPIF(MAP, "Calling targetDataMapper for the %dth argument\n", I); map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I]; Ret = targetDataMapper(Loc, Device, ArgBases[I], Args[I], ArgSizes[I], @@ -1066,8 +1081,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void *TgtPtrBegin = TPR.TargetPointer; if (!TPR.isPresent() && !TPR.isHostPointer() && (DataSize || HasPresentModifier)) { - DP("Mapping does not exist (%s)\n", - (HasPresentModifier ? "'present' map type modifier" : "ignored")); + DPIF(MAP, "Mapping does not exist (%s)\n", + (HasPresentModifier ? "'present' map type modifier" : "ignored")); if (HasPresentModifier) { // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 350 L10-13: // "If a map clause appears on a target, target data, target enter data @@ -1090,9 +1105,10 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, return OFFLOAD_FAIL; } } else { - DP("There are %" PRId64 " bytes allocated at target address " DPxMOD - " - is%s last\n", - DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsLast ? "" : " not")); + DPIF(MAP, + "There are %" PRId64 " bytes allocated at target address " DPxMOD + " - is%s last\n", + DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsLast ? "" : " not")); } // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 351 L14-16: @@ -1108,8 +1124,9 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM; if (HasFrom && (HasAlways || TPR.Flags.IsLast) && !TPR.Flags.IsHostPointer && DataSize != 0) { - DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", - DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); + DPIF(MAP, + "Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", + DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); TIMESCOPE_WITH_DETAILS_AND_IDENT( "DevToHost", "Size=" + std::to_string(DataSize) + "B", Loc); // Wait for any previous transfer if an event is present. @@ -1163,7 +1180,8 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, /*UseHoldRefCount=*/false, /*MustContain=*/true); void *TgtPtrBegin = TPR.TargetPointer; if (!TPR.isPresent()) { - DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)); + DPIF(MAP, "hst data:" DPxMOD " not found, becomes a noop\n", + DPxPTR(HstPtrBegin)); if (ArgType & OMP_TGT_MAPTYPE_PRESENT) { MESSAGE("device mapping required by 'present' motion modifier does not " "exist for host address " DPxMOD " (%" PRId64 " bytes)", @@ -1174,14 +1192,14 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, } if (TPR.Flags.IsHostPointer) { - DP("hst data:" DPxMOD " unified and shared, becomes a noop\n", - DPxPTR(HstPtrBegin)); + DPIF(MAP, "hst data:" DPxMOD " unified and shared, becomes a noop\n", + DPxPTR(HstPtrBegin)); return OFFLOAD_SUCCESS; } if (ArgType & OMP_TGT_MAPTYPE_TO) { - DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", - ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); + DPIF(MAP, "Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", + ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo, TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { @@ -1193,16 +1211,18 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, [&](ShadowPtrInfoTy &ShadowPtr) { constexpr int64_t VoidPtrSize = sizeof(void *); if (ShadowPtr.PtrSize > VoidPtrSize) { - DP("Restoring target descriptor " DPxMOD - " to its original content (%" PRId64 - " bytes), containing pointee address " DPxMOD "\n", - DPxPTR(ShadowPtr.TgtPtrAddr), ShadowPtr.PtrSize, - DPxPTR(ShadowPtr.TgtPtrContent.data())); + DPIF(MAP, + "Restoring target descriptor " DPxMOD + " to its original content (%" PRId64 + " bytes), containing pointee address " DPxMOD "\n", + DPxPTR(ShadowPtr.TgtPtrAddr), ShadowPtr.PtrSize, + DPxPTR(ShadowPtr.TgtPtrContent.data())); } else { - DP("Restoring target pointer " DPxMOD - " to its original value " DPxMOD "\n", - DPxPTR(ShadowPtr.TgtPtrAddr), - DPxPTR(ShadowPtr.TgtPtrContent.data())); + DPIF(MAP, + "Restoring target pointer " DPxMOD + " to its original value " DPxMOD "\n", + DPxPTR(ShadowPtr.TgtPtrAddr), + DPxPTR(ShadowPtr.TgtPtrContent.data())); } Ret = Device.submitData(ShadowPtr.TgtPtrAddr, ShadowPtr.TgtPtrContent.data(), @@ -1214,15 +1234,15 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, return OFFLOAD_SUCCESS; }); if (Ret != OFFLOAD_SUCCESS) { - DP("Updating shadow map failed\n"); + DPIF(MAP, "Updating shadow map failed\n"); return Ret; } } } if (ArgType & OMP_TGT_MAPTYPE_FROM) { - DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", - ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); + DPIF(MAP, "Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", + ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo, TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { @@ -1238,16 +1258,18 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, [&](const ShadowPtrInfoTy &ShadowPtr) { constexpr int64_t VoidPtrSize = sizeof(void *); if (ShadowPtr.PtrSize > VoidPtrSize) { - DP("Restoring host descriptor " DPxMOD - " to its original content (%" PRId64 - " bytes), containing pointee address " DPxMOD "\n", - DPxPTR(ShadowPtr.HstPtrAddr), ShadowPtr.PtrSize, - DPxPTR(ShadowPtr.HstPtrContent.data())); + DPIF(MAP, + "Restoring host descriptor " DPxMOD + " to its original content (%" PRId64 + " bytes), containing pointee address " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrAddr), ShadowPtr.PtrSize, + DPxPTR(ShadowPtr.HstPtrContent.data())); } else { - DP("Restoring host pointer " DPxMOD - " to its original value " DPxMOD "\n", - DPxPTR(ShadowPtr.HstPtrAddr), - DPxPTR(ShadowPtr.HstPtrContent.data())); + DPIF(MAP, + "Restoring host pointer " DPxMOD + " to its original value " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrAddr), + DPxPTR(ShadowPtr.HstPtrContent.data())); } std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(), ShadowPtr.PtrSize); @@ -1255,7 +1277,7 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, }); Entry->unlock(); if (Ret != OFFLOAD_SUCCESS) { - DP("Updating shadow map failed\n"); + DPIF(MAP, "Updating shadow map failed\n"); return Ret; } return OFFLOAD_SUCCESS; @@ -1291,9 +1313,10 @@ static int targetDataNonContiguous(ident_t *Loc, DeviceTy &Device, } } else { char *Ptr = (char *)ArgsBase + Offset; - DP("Transfer of non-contiguous : host ptr " DPxMOD " offset %" PRIu64 - " len %" PRIu64 "\n", - DPxPTR(Ptr), Offset, Size); + DPIF(MAP, + "Transfer of non-contiguous : host ptr " DPxMOD " offset %" PRIu64 + " len %" PRIu64 "\n", + DPxPTR(Ptr), Offset, Size); Ret = targetDataContiguous(Loc, Device, ArgsBase, Ptr, Size, ArgType, AsyncInfo); } @@ -1326,7 +1349,7 @@ int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // Instead of executing the regular path of targetDataUpdate, call the // targetDataMapper variant which will call targetDataUpdate again // with new arguments. - DP("Calling targetDataMapper for the %dth argument\n", I); + DPIF(MAP, "Calling targetDataMapper for the %dth argument\n", I); map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I]; int Ret = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I], @@ -1470,8 +1493,9 @@ class PrivateArgumentManagerTy { // See if the pointee's begin address has corresponding storage on device. void *TgtPteeBegin = [&]() -> void * { if (!HstPteeBegin) { - DP("Corresponding-pointer-initialization: pointee begin address is " - "null\n"); + DPIF(MAP, + "Corresponding-pointer-initialization: pointee begin address is " + "null\n"); return nullptr; } @@ -1582,9 +1606,10 @@ class PrivateArgumentManagerTy { HstPteeBegin); // Store the target pointee base address to the first VoidPtrSize bytes - DP("Initializing corresponding-pointer-initialization source buffer " - "for " DPxMOD ", with pointee base " DPxMOD "\n", - DPxPTR(HstPtr), DPxPTR(TgtPteeBase)); + DPIF(MAP, + "Initializing corresponding-pointer-initialization source buffer " + "for " DPxMOD ", with pointee base " DPxMOD "\n", + DPxPTR(HstPtr), DPxPTR(TgtPteeBase)); std::memcpy(Buffer, &TgtPteeBase, VoidPtrSize); if (HstPtrSize <= VoidPtrSize) return; @@ -1592,10 +1617,12 @@ class PrivateArgumentManagerTy { // For Fortran descriptors, copy the remaining descriptor fields from host uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize; void *HstDescriptorFieldsAddr = static_cast(HstPtr) + VoidPtrSize; - DP("Copying %" PRId64 - " bytes of descriptor fields into corresponding-pointer-initialization " - "buffer at offset %" PRId64 ", from " DPxMOD "\n", - HstDescriptorFieldsSize, VoidPtrSize, DPxPTR(HstDescriptorFieldsAddr)); + DPIF( + MAP, + "Copying %" PRId64 + " bytes of descriptor fields into corresponding-pointer-initialization " + "buffer at offset %" PRId64 ", from " DPxMOD "\n", + HstDescriptorFieldsSize, VoidPtrSize, DPxPTR(HstDescriptorFieldsAddr)); std::memcpy(Buffer + VoidPtrSize, HstDescriptorFieldsAddr, HstDescriptorFieldsSize); } @@ -1634,21 +1661,22 @@ class PrivateArgumentManagerTy { AllocImmediately) { TgtPtr = Device.allocData(ArgSize, HstPtr); if (!TgtPtr) { - DP("Data allocation for %sprivate array " DPxMOD " failed.\n", - (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtr)); + DPIF(MAP, "Data allocation for %sprivate array " DPxMOD " failed.\n", + (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtr)); return OFFLOAD_FAIL; } #ifdef OMPTARGET_DEBUG void *TgtPtrBase = (void *)((intptr_t)TgtPtr + ArgOffset); - DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD - " for %sprivate array " DPxMOD " - pushing target argument " DPxMOD - "\n", - ArgSize, DPxPTR(TgtPtr), (IsFirstPrivate ? "first-" : ""), - DPxPTR(HstPtr), DPxPTR(TgtPtrBase)); + DPIF(MAP, + "Allocated %" PRId64 " bytes of target memory at " DPxMOD + " for %sprivate array " DPxMOD " - pushing target argument " DPxMOD + "\n", + ArgSize, DPxPTR(TgtPtr), (IsFirstPrivate ? "first-" : ""), + DPxPTR(HstPtr), DPxPTR(TgtPtrBase)); #endif // If first-private, copy data from host if (IsFirstPrivate) { - DP("Submitting firstprivate data to the device.\n"); + DPIF(MAP, "Submitting firstprivate data to the device.\n"); // The source value used for corresponding-pointer-initialization // is different vs regular firstprivates. @@ -1659,16 +1687,18 @@ class PrivateArgumentManagerTy { : HstPtr; int Ret = Device.submitData(TgtPtr, DataSource, ArgSize, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { - DP("Copying %s data to device failed.\n", - IsCorrespondingPointerInit ? "corresponding-pointer-initialization" - : "firstprivate"); + DPIF(MAP, "Copying %s data to device failed.\n", + IsCorrespondingPointerInit + ? "corresponding-pointer-initialization" + : "firstprivate"); return OFFLOAD_FAIL; } } TgtPtrs.push_back(TgtPtr); } else { - DP("Firstprivate array " DPxMOD " of size %" PRId64 " will be packed\n", - DPxPTR(HstPtr), ArgSize); + DPIF(MAP, + "Firstprivate array " DPxMOD " of size %" PRId64 " will be packed\n", + DPxPTR(HstPtr), ArgSize); // When reach this point, the argument must meet all following // requirements: // 1. Its size does not exceed the threshold (see the comment for @@ -1742,17 +1772,17 @@ class PrivateArgumentManagerTy { void *TgtPtr = Device.allocData(FirstPrivateArgSize, FirstPrivateArgBuffer.data()); if (TgtPtr == nullptr) { - DP("Failed to allocate target memory for private arguments.\n"); + DPIF(MAP, "Failed to allocate target memory for private arguments.\n"); return OFFLOAD_FAIL; } TgtPtrs.push_back(TgtPtr); - DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD "\n", - FirstPrivateArgSize, DPxPTR(TgtPtr)); + DPIF(MAP, "Allocated %" PRId64 " bytes of target memory at " DPxMOD "\n", + FirstPrivateArgSize, DPxPTR(TgtPtr)); // Transfer data to target device int Ret = Device.submitData(TgtPtr, FirstPrivateArgBuffer.data(), FirstPrivateArgSize, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { - DP("Failed to submit data of private arguments.\n"); + DPIF(MAP, "Failed to submit data of private arguments.\n"); return OFFLOAD_FAIL; } // Fill in all placeholder pointers @@ -1764,10 +1794,11 @@ class PrivateArgumentManagerTy { TP += Info.Padding; Ptr = reinterpret_cast(TP); TP += Info.Size; - DP("Firstprivate array " DPxMOD " of size %" PRId64 " mapped to " DPxMOD - "\n", - DPxPTR(Info.HstPtrBegin), Info.HstPtrEnd - Info.HstPtrBegin, - DPxPTR(Ptr)); + DPIF(MAP, + "Firstprivate array " DPxMOD " of size %" PRId64 + " mapped to " DPxMOD "\n", + DPxPTR(Info.HstPtrBegin), Info.HstPtrEnd - Info.HstPtrBegin, + DPxPTR(Ptr)); } } @@ -1779,7 +1810,7 @@ class PrivateArgumentManagerTy { for (void *P : TgtPtrs) { int Ret = Device.deleteData(P); if (Ret != OFFLOAD_SUCCESS) { - DP("Deallocation of (first-)private arrays failed.\n"); + DPIF(MAP, "Deallocation of (first-)private arrays failed.\n"); return OFFLOAD_FAIL; } } @@ -1847,7 +1878,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, void *HstPtrBase = Args[Idx]; void *TgtPtrBase = (void *)((intptr_t)TgtArgs[TgtIdx] + TgtOffsets[TgtIdx]); - DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase)); + DPIF(MAP, "Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase)); uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta); void *&PointerTgtPtrBegin = AsyncInfo.getVoidPtrLocation(); @@ -1857,18 +1888,20 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, /*UseHoldRefCount=*/false); PointerTgtPtrBegin = TPR.TargetPointer; if (!TPR.isPresent()) { - DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n", - DPxPTR(HstPtrVal)); + DPIF(MAP, + "No lambda captured variable mapped (" DPxMOD ") - ignored\n", + DPxPTR(HstPtrVal)); continue; } if (TPR.Flags.IsHostPointer) { - DP("Unified memory is active, no need to map lambda captured" - "variable (" DPxMOD ")\n", - DPxPTR(HstPtrVal)); + DPIF(MAP, + "Unified memory is active, no need to map lambda captured" + "variable (" DPxMOD ")\n", + DPxPTR(HstPtrVal)); continue; } - DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", - DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); + DPIF(MAP, "Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", + DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); Ret = DeviceOrErr->submitData(TgtPtrBegin, &PointerTgtPtrBegin, sizeof(void *), AsyncInfo, TPR.getEntry()); @@ -1886,8 +1919,10 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, ptrdiff_t TgtBaseOffset; TargetPointerResultTy TPR; if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) { - DP("Forwarding first-private value " DPxMOD " to the target construct\n", - DPxPTR(HstPtrBase)); + DPIF(MAP, + "Forwarding first-private value " DPxMOD + " to the target construct\n", + DPxPTR(HstPtrBase)); TgtPtrBegin = HstPtrBase; TgtBaseOffset = 0; } else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE) { @@ -1952,8 +1987,9 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; #ifdef OMPTARGET_DEBUG void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset); - DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n", - DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin)); + DPIF(MAP, + "Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n", + DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin)); #endif } TgtArgsPositions[I] = TgtArgs.size(); @@ -1967,7 +2003,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, // Pack and transfer first-private arguments Ret = PrivateArgumentManager.packAndTransfer(TgtArgs); if (Ret != OFFLOAD_SUCCESS) { - DP("Failed to pack and transfer first private arguments\n"); + DPIF(MAP, "Failed to pack and transfer first private arguments\n"); return OFFLOAD_FAIL; } @@ -2040,7 +2076,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, } assert(TargetTable && "Global data has not been mapped\n"); - DP("loop trip count is %" PRIu64 ".\n", KernelArgs.Tripcount); + DPIF(KERNEL, "loop trip count is %" PRIu64 ".\n", KernelArgs.Tripcount); // We need to keep bases and offsets separate. Sometimes (e.g. in OpenCL) we // need to manifest base pointers prior to launching a kernel. Even if we have @@ -2079,9 +2115,10 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, // Launch device execution. void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].Address; - DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", - TargetTable->EntriesBegin[TM->Index].SymbolName, DPxPTR(TgtEntryPtr), - TM->Index); + DPIF(KERNEL, + "Launching target execution %s with pointer " DPxMOD " (index=%d).\n", + TargetTable->EntriesBegin[TM->Index].SymbolName, DPxPTR(TgtEntryPtr), + TM->Index); { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); @@ -2168,9 +2205,10 @@ int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr, // Retrieve the target kernel pointer, allocate and store the recorded device // memory data, and launch device execution. void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].Address; - DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", - TargetTable->EntriesBegin[TM->Index].SymbolName, DPxPTR(TgtEntryPtr), - TM->Index); + DPIF(KERNEL, + "Launching target execution %s with pointer " DPxMOD " (index=%d).\n", + TargetTable->EntriesBegin[TM->Index].SymbolName, DPxPTR(TgtEntryPtr), + TM->Index); void *TgtPtr = Device.allocData(DeviceMemorySize, /*HstPtr=*/nullptr, TARGET_ALLOC_DEFAULT); diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp index bc92f4a46a5c0..7951dd64b2c7f 100644 --- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp +++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp @@ -93,7 +93,7 @@ static bool checkForHSA() { auto DynlibHandle = std::make_unique( llvm::sys::DynamicLibrary::getPermanentLibrary(HsaLib, &ErrMsg)); if (!DynlibHandle->isValid()) { - DP("Unable to load library '%s': %s!\n", HsaLib, ErrMsg.c_str()); + DPIF(RTL, "Unable to load library '%s': %s!\n", HsaLib, ErrMsg.c_str()); return false; } @@ -102,10 +102,10 @@ static bool checkForHSA() { void *P = DynlibHandle->getAddressOfSymbol(Sym); if (P == nullptr) { - DP("Unable to find '%s' in '%s'!\n", Sym, HsaLib); + DPIF(RTL, "Unable to find '%s' in '%s'!\n", Sym, HsaLib); return false; } - DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P); + DPIF(RTL, "Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P); *dlwrap::pointer(I) = P; } diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index a7723b8598815..9d79e58fcaa89 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -558,7 +558,7 @@ struct AMDGPUKernelTy : public GenericKernelTy { ImplicitArgsSize = hsa_utils::getImplicitArgsSize(AMDImage.getELFABIVersion()); - DP("ELFABIVersion: %d\n", AMDImage.getELFABIVersion()); + DPIF(MODULE, "ELFABIVersion: %d\n", AMDImage.getELFABIVersion()); // Get additional kernel info read from image KernelInfo = AMDImage.getKernelInfo(getName()); @@ -3437,7 +3437,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy { hsa_status_t Status = hsa_init(); if (Status != HSA_STATUS_SUCCESS) { // Cannot call hsa_success_string. - DP("Failed to initialize AMDGPU's HSA library\n"); + DPIF(RTL, "Failed to initialize AMDGPU's HSA library\n"); return 0; } @@ -3482,7 +3482,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy { int32_t NumDevices = KernelAgents.size(); if (NumDevices == 0) { // Do not initialize if there are no devices. - DP("There are no devices supporting AMDGPU.\n"); + DPIF(RTL, "There are no devices supporting AMDGPU.\n"); return 0; } diff --git a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h index 77c756e006029..26e9bc4b12cc4 100644 --- a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h +++ b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h @@ -57,7 +57,7 @@ inline Error readAMDGPUMetaDataFromImage( MemBuffer, KernelInfoMap, ELFABIVersion); if (!Err) return Err; - DP("ELFABIVERSION Version: %u\n", ELFABIVersion); + DPIF(MODULE, "ELFABIVERSION Version: %u\n", ELFABIVersion); return Err; } diff --git a/offload/plugins-nextgen/common/include/MemoryManager.h b/offload/plugins-nextgen/common/include/MemoryManager.h index 8f6c1adcdaa58..66126cdb89181 100644 --- a/offload/plugins-nextgen/common/include/MemoryManager.h +++ b/offload/plugins-nextgen/common/include/MemoryManager.h @@ -79,7 +79,7 @@ class MemoryManagerTy { static int findBucket(size_t Size) { const size_t F = floorToPowerOfTwo(Size); - DP("findBucket: Size %zu is floored to %zu.\n", Size, F); + DPIF(MEMORY, "findBucket: Size %zu is floored to %zu.\n", Size, F); int L = 0, H = NumBuckets - 1; while (H - L > 1) { @@ -94,7 +94,7 @@ class MemoryManagerTy { assert(L >= 0 && L < NumBuckets && "L is out of range"); - DP("findBucket: Size %zu goes to bucket %d\n", Size, L); + DPIF(MEMORY, "findBucket: Size %zu goes to bucket %d\n", Size, L); return L; } @@ -192,8 +192,9 @@ class MemoryManagerTy { // We cannot get memory from the device. It might be due to OOM. Let's // free all memory in FreeLists and try again. if (TgtPtr == nullptr) { - DP("Failed to get memory on device. Free all memory in FreeLists and " - "try again.\n"); + DPIF(MEMORY, + "Failed to get memory on device. Free all memory in FreeLists and " + "try again.\n"); TgtPtrOrErr = freeAndAllocate(Size, HstPtr); if (!TgtPtrOrErr) return TgtPtrOrErr.takeError(); @@ -201,8 +202,9 @@ class MemoryManagerTy { } if (TgtPtr == nullptr) - DP("Still cannot get memory on device probably because the device is " - "OOM.\n"); + DPIF(MEMORY, + "Still cannot get memory on device probably because the device is " + "OOM.\n"); return TgtPtr; } @@ -235,21 +237,23 @@ class MemoryManagerTy { if (Size == 0) return nullptr; - DP("MemoryManagerTy::allocate: size %zu with host pointer " DPxMOD ".\n", - Size, DPxPTR(HstPtr)); + DPIF(MEMORY, + "MemoryManagerTy::allocate: size %zu with host pointer " DPxMOD ".\n", + Size, DPxPTR(HstPtr)); // If the size is greater than the threshold, allocate it directly from // device. if (Size > SizeThreshold) { - DP("%zu is greater than the threshold %zu. Allocate it directly from " - "device\n", - Size, SizeThreshold); + DPIF(MEMORY, + "%zu is greater than the threshold %zu. Allocate it directly from " + "device\n", + Size, SizeThreshold); auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr); if (!TgtPtrOrErr) return TgtPtrOrErr.takeError(); - DP("Got target pointer " DPxMOD ". Return directly.\n", - DPxPTR(*TgtPtrOrErr)); + DPIF(MEMORY, "Got target pointer " DPxMOD ". Return directly.\n", + DPxPTR(*TgtPtrOrErr)); return *TgtPtrOrErr; } @@ -272,12 +276,14 @@ class MemoryManagerTy { } if (NodePtr != nullptr) - DP("Find one node " DPxMOD " in the bucket.\n", DPxPTR(NodePtr)); + DPIF(MEMORY, "Find one node " DPxMOD " in the bucket.\n", + DPxPTR(NodePtr)); // We cannot find a valid node in FreeLists. Let's allocate on device and // create a node for it. if (NodePtr == nullptr) { - DP("Cannot find a node in the FreeLists. Allocate on device.\n"); + DPIF(MEMORY, + "Cannot find a node in the FreeLists. Allocate on device.\n"); // Allocate one on device auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr); if (!TgtPtrOrErr) @@ -294,8 +300,9 @@ class MemoryManagerTy { NodePtr = &Itr.first->second; } - DP("Node address " DPxMOD ", target pointer " DPxMOD ", size %zu\n", - DPxPTR(NodePtr), DPxPTR(TgtPtr), Size); + DPIF(MEMORY, + "Node address " DPxMOD ", target pointer " DPxMOD ", size %zu\n", + DPxPTR(NodePtr), DPxPTR(TgtPtr), Size); } assert(NodePtr && "NodePtr should not be nullptr at this point"); @@ -305,7 +312,8 @@ class MemoryManagerTy { /// Deallocate memory pointed by \p TgtPtr Error free(void *TgtPtr) { - DP("MemoryManagerTy::free: target memory " DPxMOD ".\n", DPxPTR(TgtPtr)); + DPIF(MEMORY, "MemoryManagerTy::free: target memory " DPxMOD ".\n", + DPxPTR(TgtPtr)); NodeTy *P = nullptr; @@ -322,14 +330,15 @@ class MemoryManagerTy { // The memory is not managed by the manager if (P == nullptr) { - DP("Cannot find its node. Delete it on device directly.\n"); + DPIF(MEMORY, "Cannot find its node. Delete it on device directly.\n"); return deleteOnDevice(TgtPtr); } // Insert the node to the free list const int B = findBucket(P->Size); - DP("Found its node " DPxMOD ". Insert it to bucket %d.\n", DPxPTR(P), B); + DPIF(MEMORY, "Found its node " DPxMOD ". Insert it to bucket %d.\n", + DPxPTR(P), B); { std::lock_guard G(FreeListLocks[B]); @@ -352,8 +361,8 @@ class MemoryManagerTy { size_t Threshold = MemoryManagerThreshold.get(); if (MemoryManagerThreshold.isPresent() && Threshold == 0) { - DP("Disabled memory manager as user set " - "LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD=0.\n"); + DPIF(MEMORY, "Disabled memory manager as user set " + "LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD=0.\n"); return std::make_pair(0, false); } diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 8c530bba3882c..f8277b2bfd88e 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -712,8 +712,8 @@ class PinnedAllocationMapTy { IgnoreLockMappedFailures = false; } else { // Disable by default. - DP("Invalid value LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS=%s\n", - OMPX_LockMappedBuffers.get().data()); + DPIF(MEMORY, "Invalid value LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS=%s\n", + OMPX_LockMappedBuffers.get().data()); LockMappedBuffers = false; } } @@ -1608,7 +1608,7 @@ template class GenericDeviceResourceManagerTy { /// must be called before the destructor. virtual Error deinit() { if (NextAvailable) - DP("Missing %d resources to be returned\n", NextAvailable); + DPIF(RTL, "Missing %d resources to be returned\n", NextAvailable); // TODO: This prevents a bug on libomptarget to make the plugins fail. There // may be some resources not returned. Do not destroy these ones. diff --git a/offload/plugins-nextgen/common/src/GlobalHandler.cpp b/offload/plugins-nextgen/common/src/GlobalHandler.cpp index 5464c197dba78..436a62f2ba330 100644 --- a/offload/plugins-nextgen/common/src/GlobalHandler.cpp +++ b/offload/plugins-nextgen/common/src/GlobalHandler.cpp @@ -75,12 +75,13 @@ Error GenericGlobalHandlerTy::moveGlobalBetweenDeviceAndHost( return Err; } - DP("Successfully %s %u bytes associated with global symbol '%s' %s the " - "device " - "(%p -> %p).\n", - Device2Host ? "read" : "write", HostGlobal.getSize(), - HostGlobal.getName().data(), Device2Host ? "from" : "to", - DeviceGlobal.getPtr(), HostGlobal.getPtr()); + DPIF(MAP, + "Successfully %s %u bytes associated with global symbol '%s' %s the " + "device " + "(%p -> %p).\n", + Device2Host ? "read" : "write", HostGlobal.getSize(), + HostGlobal.getName().data(), Device2Host ? "from" : "to", + DeviceGlobal.getPtr(), HostGlobal.getPtr()); return Plugin::success(); } @@ -157,10 +158,11 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device, HostGlobal.getName().data(), ImageGlobal.getSize(), HostGlobal.getSize()); - DP("Global symbol '%s' was found in the ELF image and %u bytes will copied " - "from %p to %p.\n", - HostGlobal.getName().data(), HostGlobal.getSize(), ImageGlobal.getPtr(), - HostGlobal.getPtr()); + DPIF(MAP, + "Global symbol '%s' was found in the ELF image and %u bytes will copied " + "from %p to %p.\n", + HostGlobal.getName().data(), HostGlobal.getSize(), ImageGlobal.getPtr(), + HostGlobal.getPtr()); assert(Image.getStart() <= ImageGlobal.getPtr() && utils::advancePtr(ImageGlobal.getPtr(), ImageGlobal.getSize()) < diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index db43cbe49cc2b..bb2d7cb80afd9 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -99,7 +99,8 @@ struct RecordReplayTy { VAddr = *VAddrOrErr; } - DP("Request %ld bytes allocated at %p\n", MaxMemoryAllocation, VAddr); + DPIF(MEMORY, "Request %ld bytes allocated at %p\n", MaxMemoryAllocation, + VAddr); if (auto Err = Device->memoryVAMap(&MemoryStart, VAddr, &ASize)) return Err; @@ -339,7 +340,7 @@ struct RecordReplayTy { Alloc = MemoryPtr; MemoryPtr = (char *)MemoryPtr + AlignedSize; MemorySize += AlignedSize; - DP("Memory Allocator return " DPxMOD "\n", DPxPTR(Alloc)); + DPIF(MEMORY, "Memory Allocator return " DPxMOD "\n", DPxPTR(Alloc)); return Alloc; } @@ -413,9 +414,10 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice, return Err; } else { KernelEnvironment = KernelEnvironmentTy{}; - DP("Failed to read kernel environment for '%s' Using default Bare (0) " - "execution mode\n", - getName()); + DPIF(MODULE, + "Failed to read kernel environment for '%s' Using default Bare (0) " + "execution mode\n", + getName()); } // Max = Config.Max > 0 ? min(Config.Max, Device.Max) : Device.Max; @@ -722,7 +724,8 @@ GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, if (ompt::Initialized && ompt::lookupCallbackByCode) { \ ompt::lookupCallbackByCode((ompt_callbacks_t)(Code), \ ((ompt_callback_t *)&(Name##_fn))); \ - DP("OMPT: class bound %s=%p\n", #Name, ((void *)(uint64_t)Name##_fn)); \ + DPIF(TOOL, "OMPT: class bound %s=%p\n", #Name, \ + ((void *)(uint64_t)Name##_fn)); \ } FOREACH_OMPT_DEVICE_EVENT(bindOmptCallback); @@ -872,7 +875,8 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { } Expected GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, StringRef InputTgtImage) { - DP("Load data from image " DPxMOD "\n", DPxPTR(InputTgtImage.bytes_begin())); + DPIF(MODULE, "Load data from image " DPxMOD "\n", + DPxPTR(InputTgtImage.bytes_begin())); std::unique_ptr Buffer; if (identify_magic(InputTgtImage) == file_magic::bitcode) { @@ -959,7 +963,8 @@ Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin, GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler(); if (!GHandler.isSymbolInImage(*this, Image, "__omp_rtl_device_memory_pool_tracker")) { - DP("Skip the memory pool as there is no tracker symbol in the image."); + DPIF(MEMORY, + "Skip the memory pool as there is no tracker symbol in the image."); return Error::success(); } @@ -1000,7 +1005,7 @@ Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin, return Err; RPCServer = &Server; - DP("Running an RPC server on device %d\n", getDeviceId()); + DPIF(RTL, "Running an RPC server on device %d\n", getDeviceId()); return Plugin::success(); } @@ -1722,8 +1727,8 @@ int32_t GenericPluginTy::is_initialized() const { return Initialized; } int32_t GenericPluginTy::isPluginCompatible(StringRef Image) { auto HandleError = [&](Error Err) -> bool { [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); - DP("Failure to check validity of image %p: %s", Image.data(), - ErrStr.c_str()); + DPIF(MODULE, "Failure to check validity of image %p: %s", Image.data(), + ErrStr.c_str()); return false; }; switch (identify_magic(Image)) { @@ -1751,8 +1756,8 @@ int32_t GenericPluginTy::isPluginCompatible(StringRef Image) { int32_t GenericPluginTy::isDeviceCompatible(int32_t DeviceId, StringRef Image) { auto HandleError = [&](Error Err) -> bool { [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); - DP("Failure to check validity of image %p: %s", Image.data(), - ErrStr.c_str()); + DPIF(MODULE, "Failure to check validity of image %p: %s", Image.data(), + ErrStr.c_str()); return false; }; switch (identify_magic(Image)) { diff --git a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp index f5b2d074a47e7..73e551e15681c 100644 --- a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp +++ b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp @@ -141,7 +141,7 @@ static bool checkForCUDA() { auto DynlibHandle = std::make_unique( llvm::sys::DynamicLibrary::getPermanentLibrary(CudaLib, &ErrMsg)); if (!DynlibHandle->isValid()) { - DP("Unable to load library '%s': %s!\n", CudaLib, ErrMsg.c_str()); + DPIF(RTL, "Unable to load library '%s': %s!\n", CudaLib, ErrMsg.c_str()); return false; } @@ -153,7 +153,7 @@ static bool checkForCUDA() { const char *First = It->second; void *P = DynlibHandle->getAddressOfSymbol(First); if (P) { - DP("Implementing %s with dlsym(%s) -> %p\n", Sym, First, P); + DPIF(RTL, "Implementing %s with dlsym(%s) -> %p\n", Sym, First, P); *dlwrap::pointer(I) = P; continue; } @@ -161,10 +161,10 @@ static bool checkForCUDA() { void *P = DynlibHandle->getAddressOfSymbol(Sym); if (P == nullptr) { - DP("Unable to find '%s' in '%s'!\n", Sym, CudaLib); + DPIF(RTL, "Unable to find '%s' in '%s'!\n", Sym, CudaLib); return false; } - DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P); + DPIF(RTL, "Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P); *dlwrap::pointer(I) = P; } diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index db94f7f2dd995..731f2c83234bb 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -1516,13 +1516,13 @@ struct CUDAPluginTy final : public GenericPluginTy { CUresult Res = cuInit(0); if (Res == CUDA_ERROR_INVALID_HANDLE) { // Cannot call cuGetErrorString if dlsym failed. - DP("Failed to load CUDA shared library\n"); + DPIF(RTL, "Failed to load CUDA shared library\n"); return 0; } if (Res == CUDA_ERROR_NO_DEVICE) { // Do not initialize if there are no devices. - DP("There are no devices supporting CUDA.\n"); + DPIF(RTL, "There are no devices supporting CUDA.\n"); return 0; } @@ -1537,7 +1537,7 @@ struct CUDAPluginTy final : public GenericPluginTy { // Do not initialize if there are no devices. if (NumDevices == 0) - DP("There are no devices supporting CUDA.\n"); + DPIF(RTL, "There are no devices supporting CUDA.\n"); return NumDevices; } @@ -1645,7 +1645,7 @@ Error CUDADeviceTy::dataExchangeImpl(const void *SrcPtr, if (Res == CUDA_ERROR_TOO_MANY_PEERS) { // Resources may be exhausted due to many P2P links. CanAccessPeer = 0; - DP("Too many P2P so fall back to D2D memcpy"); + DPIF(MEMORY, "Too many P2P so fall back to D2D memcpy"); } else if (auto Err = Plugin::check(Res, "error in cuCtxEnablePeerAccess: %s")) return Err; diff --git a/offload/plugins-nextgen/host/dynamic_ffi/ffi.cpp b/offload/plugins-nextgen/host/dynamic_ffi/ffi.cpp index c586ad1c1969b..7e18f99252b1a 100644 --- a/offload/plugins-nextgen/host/dynamic_ffi/ffi.cpp +++ b/offload/plugins-nextgen/host/dynamic_ffi/ffi.cpp @@ -41,7 +41,7 @@ uint32_t ffi_init() { llvm::sys::DynamicLibrary::getPermanentLibrary(FFI_PATH, &ErrMsg)); if (!DynlibHandle->isValid()) { - DP("Unable to load library '%s': %s!\n", FFI_PATH, ErrMsg.c_str()); + DPIF(RTL, "Unable to load library '%s': %s!\n", FFI_PATH, ErrMsg.c_str()); return DYNAMIC_FFI_FAIL; } @@ -50,10 +50,10 @@ uint32_t ffi_init() { void *P = DynlibHandle->getAddressOfSymbol(Sym); if (P == nullptr) { - DP("Unable to find '%s' in '%s'!\n", Sym, FFI_PATH); + DPIF(RTL, "Unable to find '%s' in '%s'!\n", Sym, FFI_PATH); return DYNAMIC_FFI_FAIL; } - DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P); + DPIF(RTL, "Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P); *dlwrap::pointer(I) = P; } @@ -62,7 +62,7 @@ uint32_t ffi_init() { { \ void *SymbolPtr = DynlibHandle->getAddressOfSymbol(#SYMBOL); \ if (!SymbolPtr) { \ - DP("Unable to find '%s' in '%s'!\n", #SYMBOL, FFI_PATH); \ + DPIF(RTL, "Unable to find '%s' in '%s'!\n", #SYMBOL, FFI_PATH); \ return DYNAMIC_FFI_FAIL; \ } \ SYMBOL = *reinterpret_cast(SymbolPtr); \