Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6387,11 +6387,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions,
options::OPT_fno_convergent_functions);

// NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
// for sampling, overhead of call arc collection is way too high and there's
// no way to collect the output.
if (!Triple.isNVPTX() && !Triple.isAMDGCN())
addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);
addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);

Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ);

Expand Down
33 changes: 0 additions & 33 deletions clang/test/Driver/cuda-no-pgo-or-coverage.cu

This file was deleted.

2 changes: 1 addition & 1 deletion compiler-rt/include/profile/InstrProfData.inc
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
#define INSTR_PROF_DATA_DEFINED
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did this change?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This ensures that a GPU profile is written with the version pulled from the GPU. Previously, if the host used LLVM-level instrumentation and the device has clang-level instrumentation, the GPU profile would use the host's format. This change ensures that is not the case.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file is a part of public API and is used by a number of users that have their own runtime implementation (for example baremetal users, OS kernels) and this change is going to break all of them. Is there another way we could handle this case that wouldn't require this change and avoid all that churn?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the best solution would be to forbid the user from using different instrumentation levels (LLVM IR vs clang) on the host and device. I can replace the version replacement with a check that ensures the device version matches the host version.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I fixed the version detection and made sure to set the Version property the custom value instead of changing the default value.

INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
Expand Down
3 changes: 2 additions & 1 deletion compiler-rt/lib/profile/InstrProfiling.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,8 @@ int __llvm_write_custom_profile(const char *Target,
const __llvm_profile_data *DataEnd,
const char *CountersBegin,
const char *CountersEnd, const char *NamesBegin,
const char *NamesEnd);
const char *NamesEnd,
const uint64_t *VersionOverride);

/*!
* This variable is defined in InstrProfilingRuntime.cpp as a hidden
Expand Down
3 changes: 2 additions & 1 deletion compiler-rt/lib/profile/InstrProfilingBuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,5 +252,6 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
&BufferWriter, DataBegin, DataEnd, CountersBegin, CountersEnd,
BitmapBegin, BitmapEnd, /*VPDataReader=*/0, NamesBegin, NamesEnd,
/*VTableBegin=*/NULL, /*VTableEnd=*/NULL, /*VNamesBegin=*/NULL,
/*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0);
/*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0,
__llvm_profile_get_version());
}
22 changes: 15 additions & 7 deletions compiler-rt/lib/profile/InstrProfilingFile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1273,10 +1273,13 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File,
return 0;
}

COMPILER_RT_USED int __llvm_write_custom_profile(
const char *Target, const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd, const char *CountersBegin,
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd) {
int __llvm_write_custom_profile(const char *Target,
const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd,
const char *CountersBegin,
const char *CountersEnd, const char *NamesBegin,
const char *NamesEnd,
const uint64_t *VersionOverride) {
int ReturnValue = 0, FilenameLength, TargetLength;
char *FilenameBuf, *TargetFilename;
const char *Filename;
Expand Down Expand Up @@ -1358,10 +1361,15 @@ COMPILER_RT_USED int __llvm_write_custom_profile(
ProfDataWriter fileWriter;
initFileWriter(&fileWriter, OutputFile);

uint64_t Version = __llvm_profile_get_version();
if (VersionOverride)
Version = *VersionOverride;

/* Write custom data to the file */
ReturnValue = lprofWriteDataImpl(
&fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
lprofGetVPDataReader(), NULL, NULL, NULL, NULL, NamesBegin, NamesEnd, 0);
ReturnValue =
lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd, CountersBegin,
CountersEnd, NULL, NULL, lprofGetVPDataReader(), NULL,
NULL, NULL, NULL, NamesBegin, NamesEnd, 0, Version);
closeFileObject(OutputFile);

// Restore SIGKILL.
Expand Down
3 changes: 2 additions & 1 deletion compiler-rt/lib/profile/InstrProfilingInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ int lprofWriteDataImpl(ProfDataWriter *Writer,
VPDataReaderType *VPDataReader, const char *NamesBegin,
const char *NamesEnd, const VTableProfData *VTableBegin,
const VTableProfData *VTableEnd, const char *VNamesBegin,
const char *VNamesEnd, int SkipNameDataWrite);
const char *VNamesEnd, int SkipNameDataWrite,
uint64_t Version);

/* Merge value profile data pointed to by SrcValueProfData into
* in-memory profile counters pointed by to DstData. */
Expand Down
20 changes: 10 additions & 10 deletions compiler-rt/lib/profile/InstrProfilingWriter.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,21 +254,21 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
const char *VNamesBegin = __llvm_profile_begin_vtabnames();
const char *VNamesEnd = __llvm_profile_end_vtabnames();
uint64_t Version = __llvm_profile_get_version();
return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
CountersEnd, BitmapBegin, BitmapEnd, VPDataReader,
NamesBegin, NamesEnd, VTableBegin, VTableEnd,
VNamesBegin, VNamesEnd, SkipNameDataWrite);
VNamesBegin, VNamesEnd, SkipNameDataWrite, Version);
}

COMPILER_RT_VISIBILITY int
lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd,
const char *CountersBegin, const char *CountersEnd,
const char *BitmapBegin, const char *BitmapEnd,
VPDataReaderType *VPDataReader, const char *NamesBegin,
const char *NamesEnd, const VTableProfData *VTableBegin,
const VTableProfData *VTableEnd, const char *VNamesBegin,
const char *VNamesEnd, int SkipNameDataWrite) {
COMPILER_RT_VISIBILITY int lprofWriteDataImpl(
ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd, const char *CountersBegin,
const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd,
VPDataReaderType *VPDataReader, const char *NamesBegin,
const char *NamesEnd, const VTableProfData *VTableBegin,
const VTableProfData *VTableEnd, const char *VNamesBegin,
const char *VNamesEnd, int SkipNameDataWrite, uint64_t Version) {
/* Calculate size of sections. */
const uint64_t DataSectionSize =
__llvm_profile_get_data_size(DataBegin, DataEnd);
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/ProfileData/InstrProfData.inc
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
#define INSTR_PROF_DATA_DEFINED
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,10 @@ createIRLevelProfileFlagVar(Module &M,
auto IRLevelVersionVariable = new GlobalVariable(
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
if (isGPUProfTarget(M))
IRLevelVersionVariable->setVisibility(GlobalValue::ProtectedVisibility);
else
IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
Triple TT(M.getTargetTriple());
if (TT.supportsCOMDAT()) {
IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/tools/llvm-profdata/binary-ids-padding.test
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
Expand Down
6 changes: 4 additions & 2 deletions offload/plugins-nextgen/common/include/GlobalHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H

#include <optional>
#include <type_traits>

#include "llvm/ADT/DenseMap.h"
Expand Down Expand Up @@ -67,15 +68,16 @@ extern "C" {
extern int __attribute__((weak)) __llvm_write_custom_profile(
const char *Target, const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd, const char *CountersBegin,
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd);
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd,
const uint64_t *VersionOverride);
}

/// PGO profiling data extracted from a GPU device
struct GPUProfGlobals {
SmallVector<int64_t> Counts;
SmallVector<__llvm_profile_data> Data;
SmallVector<uint8_t> NamesData;
Triple TargetTriple;
std::optional<uint64_t> Version;

void dump() const;
Error write() const;
Expand Down
18 changes: 14 additions & 4 deletions offload/plugins-nextgen/common/src/GlobalHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "Shared/Utils.h"

#include "llvm/ProfileData/InstrProfData.inc"
#include "llvm/Support/Error.h"

#include <cstring>
Expand Down Expand Up @@ -214,6 +215,13 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
return Err;
DeviceProfileData.Data.push_back(std::move(Data));
} else if (*NameOrErr == INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)) {
uint64_t RawVersionData;
GlobalTy RawVersionGlobal(NameOrErr->str(), Sym.getSize(),
&RawVersionData);
if (auto Err = readGlobalFromDevice(Device, Image, RawVersionGlobal))
return Err;
DeviceProfileData.Version = RawVersionData;
}
}
return DeviceProfileData;
Expand Down Expand Up @@ -265,7 +273,7 @@ void GPUProfGlobals::dump() const {
}

Error GPUProfGlobals::write() const {
if (!__llvm_write_custom_profile)
if (__llvm_write_custom_profile == nullptr)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (__llvm_write_custom_profile == nullptr)
if (!__llvm_write_custom_profile)

nit. this is idiomatic and shouldn't have been changed.

return Plugin::error("Could not find symbol __llvm_write_custom_profile. "
"The compiler-rt profiling library must be linked for "
"GPU PGO to work.");
Expand All @@ -274,6 +282,8 @@ Error GPUProfGlobals::write() const {
CountsSize = Counts.size() * sizeof(int64_t);
__llvm_profile_data *DataBegin, *DataEnd;
char *CountersBegin, *CountersEnd, *NamesBegin, *NamesEnd;
const uint64_t *VersionOverride =
Version.has_value() ? &Version.value() : nullptr;

// Initialize array of contiguous data. We need to make sure each section is
// contiguous so that the PGO library can compute deltas properly
Expand All @@ -295,9 +305,9 @@ Error GPUProfGlobals::write() const {
memcpy(NamesBegin, NamesData.data(), NamesData.size());

// Invoke compiler-rt entrypoint
int result = __llvm_write_custom_profile(TargetTriple.str().c_str(),
DataBegin, DataEnd, CountersBegin,
CountersEnd, NamesBegin, NamesEnd);
int result = __llvm_write_custom_profile(
TargetTriple.str().c_str(), DataBegin, DataEnd, CountersBegin,
CountersEnd, NamesBegin, NamesEnd, VersionOverride);
if (result != 0)
return Plugin::error("Error writing GPU PGO data to file");

Expand Down
84 changes: 84 additions & 0 deletions offload/test/offloading/gpupgo/pgo1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// RUN: %libomptarget-compile-generic -fcreate-profile \
// RUN: -Xarch_device -fprofile-generate
// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
// RUN: %target_triple.%basename_t.llvm.profraw | \
// RUN: %fcheck-generic --check-prefix="LLVM-PGO"

// RUN: %libomptarget-compile-generic -fcreate-profile \
// RUN: -Xarch_device -fprofile-instr-generate
// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
// RUN: %target_triple.%basename_t.clang.profraw | \
// RUN: %fcheck-generic --check-prefix="CLANG-PGO"

// REQUIRES: gpu
// REQUIRES: pgo

int test1(int a) { return a / 2; }
int test2(int a) { return a * 2; }

int main() {
int m = 2;
#pragma omp target
for (int i = 0; i < 10; i++) {
m = test1(m);
for (int j = 0; j < 2; j++) {
m = test2(m);
}
}
}

// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-PGO: Counters: 4
// LLVM-PGO: Block counts: [20, 10, 2, 1]

// LLVM-PGO-LABEL: test1:
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-PGO: Counters: 1
// LLVM-PGO: Block counts: [10]

// LLVM-PGO-LABEL: test2:
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-PGO: Counters: 1
// LLVM-PGO: Block counts: [20]

// LLVM-PGO-LABEL: Instrumentation level:
// LLVM-PGO-SAME: IR
// LLVM-PGO-SAME: entry_first = 0
// LLVM-PGO-LABEL: Functions shown:
// LLVM-PGO-SAME: 3
// LLVM-PGO-LABEL: Maximum function count:
// LLVM-PGO-SAME: 20
// LLVM-PGO-LABEL: Maximum internal block count:
// LLVM-PGO-SAME: 10

// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// CLANG-PGO: Counters: 3
// CLANG-PGO: Function count: 0
// CLANG-PGO: Block counts: [11, 20]

// CLANG-PGO-LABEL: test1:
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// CLANG-PGO: Counters: 1
// CLANG-PGO: Function count: 10
// CLANG-PGO: Block counts: []

// CLANG-PGO-LABEL: test2:
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// CLANG-PGO: Counters: 1
// CLANG-PGO: Function count: 20
// CLANG-PGO: Block counts: []

// CLANG-PGO-LABEL: Instrumentation level:
// CLANG-PGO-SAME: Front-end
// CLANG-PGO-LABEL: Functions shown:
// CLANG-PGO-SAME: 3
// CLANG-PGO-LABEL: Maximum function count:
// CLANG-PGO-SAME: 20
// CLANG-PGO-LABEL: Maximum internal block count:
// CLANG-PGO-SAME: 20
Loading
Loading