Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CodeGenPGO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1357,6 +1357,9 @@ void CodeGenPGO::setProfileVersion(llvm::Module &M) {

IRLevelVersionVariable->setVisibility(llvm::GlobalValue::HiddenVisibility);
llvm::Triple TT(M.getTargetTriple());
if (TT.isAMDGPU() || TT.isNVPTX())
IRLevelVersionVariable->setVisibility(
llvm::GlobalValue::ProtectedVisibility);
if (TT.supportsCOMDAT()) {
IRLevelVersionVariable->setLinkage(llvm::GlobalValue::ExternalLinkage);
IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
Expand Down
6 changes: 1 addition & 5 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6387,11 +6387,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions,
options::OPT_fno_convergent_functions);

// NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
// for sampling, overhead of call arc collection is way too high and there's
// no way to collect the output.
if (!Triple.isNVPTX() && !Triple.isAMDGCN())
addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);
addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);

Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ);

Expand Down
33 changes: 0 additions & 33 deletions clang/test/Driver/cuda-no-pgo-or-coverage.cu

This file was deleted.

3 changes: 2 additions & 1 deletion compiler-rt/lib/profile/InstrProfiling.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,8 @@ int __llvm_write_custom_profile(const char *Target,
const __llvm_profile_data *DataEnd,
const char *CountersBegin,
const char *CountersEnd, const char *NamesBegin,
const char *NamesEnd);
const char *NamesEnd,
const uint64_t *VersionOverride);

/*!
* This variable is defined in InstrProfilingRuntime.cpp as a hidden
Expand Down
3 changes: 2 additions & 1 deletion compiler-rt/lib/profile/InstrProfilingBuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,5 +252,6 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
&BufferWriter, DataBegin, DataEnd, CountersBegin, CountersEnd,
BitmapBegin, BitmapEnd, /*VPDataReader=*/0, NamesBegin, NamesEnd,
/*VTableBegin=*/NULL, /*VTableEnd=*/NULL, /*VNamesBegin=*/NULL,
/*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0);
/*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0,
__llvm_profile_get_version());
}
22 changes: 15 additions & 7 deletions compiler-rt/lib/profile/InstrProfilingFile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1273,10 +1273,13 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File,
return 0;
}

COMPILER_RT_USED int __llvm_write_custom_profile(
const char *Target, const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd, const char *CountersBegin,
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd) {
int __llvm_write_custom_profile(const char *Target,
const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd,
const char *CountersBegin,
const char *CountersEnd, const char *NamesBegin,
const char *NamesEnd,
const uint64_t *VersionOverride) {
int ReturnValue = 0, FilenameLength, TargetLength;
char *FilenameBuf, *TargetFilename;
const char *Filename;
Expand Down Expand Up @@ -1358,10 +1361,15 @@ COMPILER_RT_USED int __llvm_write_custom_profile(
ProfDataWriter fileWriter;
initFileWriter(&fileWriter, OutputFile);

uint64_t Version = __llvm_profile_get_version();
if (VersionOverride)
Version = *VersionOverride;

/* Write custom data to the file */
ReturnValue = lprofWriteDataImpl(
&fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
lprofGetVPDataReader(), NULL, NULL, NULL, NULL, NamesBegin, NamesEnd, 0);
ReturnValue =
lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd, CountersBegin,
CountersEnd, NULL, NULL, lprofGetVPDataReader(), NULL,
NULL, NULL, NULL, NamesBegin, NamesEnd, 0, Version);
closeFileObject(OutputFile);

// Restore SIGKILL.
Expand Down
3 changes: 2 additions & 1 deletion compiler-rt/lib/profile/InstrProfilingInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ int lprofWriteDataImpl(ProfDataWriter *Writer,
VPDataReaderType *VPDataReader, const char *NamesBegin,
const char *NamesEnd, const VTableProfData *VTableBegin,
const VTableProfData *VTableEnd, const char *VNamesBegin,
const char *VNamesEnd, int SkipNameDataWrite);
const char *VNamesEnd, int SkipNameDataWrite,
uint64_t Version);

/* Merge value profile data pointed to by SrcValueProfData into
* in-memory profile counters pointed by to DstData. */
Expand Down
21 changes: 11 additions & 10 deletions compiler-rt/lib/profile/InstrProfilingWriter.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,21 +254,21 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
const char *VNamesBegin = __llvm_profile_begin_vtabnames();
const char *VNamesEnd = __llvm_profile_end_vtabnames();
uint64_t Version = __llvm_profile_get_version();
return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
CountersEnd, BitmapBegin, BitmapEnd, VPDataReader,
NamesBegin, NamesEnd, VTableBegin, VTableEnd,
VNamesBegin, VNamesEnd, SkipNameDataWrite);
VNamesBegin, VNamesEnd, SkipNameDataWrite, Version);
}

COMPILER_RT_VISIBILITY int
lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd,
const char *CountersBegin, const char *CountersEnd,
const char *BitmapBegin, const char *BitmapEnd,
VPDataReaderType *VPDataReader, const char *NamesBegin,
const char *NamesEnd, const VTableProfData *VTableBegin,
const VTableProfData *VTableEnd, const char *VNamesBegin,
const char *VNamesEnd, int SkipNameDataWrite) {
COMPILER_RT_VISIBILITY int lprofWriteDataImpl(
ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd, const char *CountersBegin,
const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd,
VPDataReaderType *VPDataReader, const char *NamesBegin,
const char *NamesEnd, const VTableProfData *VTableBegin,
const VTableProfData *VTableEnd, const char *VNamesBegin,
const char *VNamesEnd, int SkipNameDataWrite, uint64_t Version) {
/* Calculate size of sections. */
const uint64_t DataSectionSize =
__llvm_profile_get_data_size(DataBegin, DataEnd);
Expand Down Expand Up @@ -308,6 +308,7 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
#define INSTR_PROF_RAW_HEADER(Type, Name, Init) Header.Name = Init;
#include "profile/InstrProfData.inc"
}
Header.Version = Version;

/* On WIN64, label differences are truncated 32-bit values. Truncate
* CountersDelta to match. */
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,10 @@ createIRLevelProfileFlagVar(Module &M,
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
if (isGPUProfTarget(M))
IRLevelVersionVariable->setVisibility(
llvm::GlobalValue::ProtectedVisibility);

Triple TT(M.getTargetTriple());
if (TT.supportsCOMDAT()) {
IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
Expand Down
5 changes: 3 additions & 2 deletions offload/plugins-nextgen/common/include/GlobalHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,16 @@ extern "C" {
extern int __attribute__((weak)) __llvm_write_custom_profile(
const char *Target, const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd, const char *CountersBegin,
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd);
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd,
const uint64_t *VersionOverride);
}

/// PGO profiling data extracted from a GPU device
struct GPUProfGlobals {
SmallVector<int64_t> Counts;
SmallVector<__llvm_profile_data> Data;
SmallVector<uint8_t> NamesData;
Triple TargetTriple;
uint64_t Version = INSTR_PROF_RAW_VERSION;

void dump() const;
Error write() const;
Expand Down
14 changes: 11 additions & 3 deletions offload/plugins-nextgen/common/src/GlobalHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "Shared/Utils.h"

#include "llvm/ProfileData/InstrProfData.inc"
#include "llvm/Support/Error.h"

#include <cstring>
Expand Down Expand Up @@ -214,6 +215,13 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
return Err;
DeviceProfileData.Data.push_back(std::move(Data));
} else if (*NameOrErr == INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)) {
uint64_t RawVersionData;
GlobalTy RawVersionGlobal(NameOrErr->str(), Sym.getSize(),
&RawVersionData);
if (auto Err = readGlobalFromDevice(Device, Image, RawVersionGlobal))
return Err;
DeviceProfileData.Version = RawVersionData;
}
}
return DeviceProfileData;
Expand Down Expand Up @@ -295,9 +303,9 @@ Error GPUProfGlobals::write() const {
memcpy(NamesBegin, NamesData.data(), NamesData.size());

// Invoke compiler-rt entrypoint
int result = __llvm_write_custom_profile(TargetTriple.str().c_str(),
DataBegin, DataEnd, CountersBegin,
CountersEnd, NamesBegin, NamesEnd);
int result = __llvm_write_custom_profile(
TargetTriple.str().c_str(), DataBegin, DataEnd, CountersBegin,
CountersEnd, NamesBegin, NamesEnd, &Version);
if (result != 0)
return Plugin::error("Error writing GPU PGO data to file");

Expand Down
84 changes: 84 additions & 0 deletions offload/test/offloading/gpupgo/pgo1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// RUN: %libomptarget-compile-generic -fcreate-profile \
// RUN: -Xarch_device -fprofile-generate
// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
// RUN: %target_triple.%basename_t.llvm.profraw | \
// RUN: %fcheck-generic --check-prefix="LLVM-PGO"

// RUN: %libomptarget-compile-generic -fcreate-profile \
// RUN: -Xarch_device -fprofile-instr-generate
// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
// RUN: %target_triple.%basename_t.clang.profraw | \
// RUN: %fcheck-generic --check-prefix="CLANG-PGO"

// REQUIRES: gpu
// REQUIRES: pgo

int test1(int a) { return a / 2; }
int test2(int a) { return a * 2; }

int main() {
int m = 2;
#pragma omp target
for (int i = 0; i < 10; i++) {
m = test1(m);
for (int j = 0; j < 2; j++) {
m = test2(m);
}
}
}

// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-PGO: Counters: 4
// LLVM-PGO: Block counts: [20, 10, 2, 1]

// LLVM-PGO-LABEL: test1:
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-PGO: Counters: 1
// LLVM-PGO: Block counts: [10]

// LLVM-PGO-LABEL: test2:
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-PGO: Counters: 1
// LLVM-PGO: Block counts: [20]

// LLVM-PGO-LABEL: Instrumentation level:
// LLVM-PGO-SAME: IR
// LLVM-PGO-SAME: entry_first = 0
// LLVM-PGO-LABEL: Functions shown:
// LLVM-PGO-SAME: 3
// LLVM-PGO-LABEL: Maximum function count:
// LLVM-PGO-SAME: 20
// LLVM-PGO-LABEL: Maximum internal block count:
// LLVM-PGO-SAME: 10

// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// CLANG-PGO: Counters: 3
// CLANG-PGO: Function count: 0
// CLANG-PGO: Block counts: [11, 20]

// CLANG-PGO-LABEL: test1:
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// CLANG-PGO: Counters: 1
// CLANG-PGO: Function count: 10
// CLANG-PGO: Block counts: []

// CLANG-PGO-LABEL: test2:
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// CLANG-PGO: Counters: 1
// CLANG-PGO: Function count: 20
// CLANG-PGO: Block counts: []

// CLANG-PGO-LABEL: Instrumentation level:
// CLANG-PGO-SAME: Front-end
// CLANG-PGO-LABEL: Functions shown:
// CLANG-PGO-SAME: 3
// CLANG-PGO-LABEL: Maximum function count:
// CLANG-PGO-SAME: 20
// CLANG-PGO-LABEL: Maximum internal block count:
// CLANG-PGO-SAME: 20
Loading