Skip to content

Commit c2d3d17

Browse files
committed
merge main into amd-staging
2 parents e9b13a0 + 23302a2 commit c2d3d17

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+3417
-7588
lines changed

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,12 @@ static cl::opt<DynoStatsSortOrder> DynoStatsSortOrderOpt(
6060
"print-sorted-by-order",
6161
cl::desc("use ascending or descending order when printing functions "
6262
"ordered by dyno stats"),
63-
cl::init(DynoStatsSortOrder::Descending), cl::cat(BoltOptCategory));
63+
cl::init(DynoStatsSortOrder::Descending),
64+
cl::values(clEnumValN(DynoStatsSortOrder::Ascending, "ascending",
65+
"Ascending order"),
66+
clEnumValN(DynoStatsSortOrder::Descending, "descending",
67+
"Descending order")),
68+
cl::cat(BoltOptCategory));
6469

6570
cl::list<std::string>
6671
HotTextMoveSections("hot-text-move-sections",
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Check that --print-sorted-by-order=<ascending/descending> option works properly in llvm-bolt
2+
#
3+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
4+
# RUN: %clang %cflags -fPIC -pie %t.o -o %t.exe -nostdlib -Wl,-q
5+
# RUN: link_fdata %s %t.o %t.fdata
6+
# RUN: llvm-bolt %t.exe -o %t.bolt --print-sorted-by=all --print-sorted-by-order=ascending \
7+
# RUN: --data %t.fdata | FileCheck %s -check-prefix=CHECK-ASCEND
8+
# RUN: llvm-bolt %t.exe -o %t.bolt --print-sorted-by=all --print-sorted-by-order=descending \
9+
# RUN: --data %t.fdata | FileCheck %s -check-prefix=CHECK-DESCEND
10+
11+
# CHECK-ASCEND: BOLT-INFO: top functions sorted by dyno stats are:
12+
# CHECK-ASCEND-NEXT: bar
13+
# CHECK-ASCEND-NEXT: foo
14+
# CHECK-DESCEND: BOLT-INFO: top functions sorted by dyno stats are:
15+
# CHECK-DESCEND-NEXT: foo
16+
# CHECK-DESCEND-NEXT: bar
17+
18+
.text
19+
.align 4
20+
.global bar
21+
.type bar, %function
22+
bar:
23+
mov w0, wzr
24+
ret
25+
26+
.global foo
27+
.type foo, %function
28+
foo:
29+
# FDATA: 1 foo 0 1 bar 0 0 1
30+
bl bar
31+
ret

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 17 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1429,9 +1429,9 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
14291429
if (!CGF.HaveInsertPoint())
14301430
return;
14311431

1432-
auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond, NumThreads,
1433-
NumThreadsModifier, Severity, Message](
1434-
CodeGenFunction &CGF, PrePostActionTy &Action) {
1432+
auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond,
1433+
NumThreads](CodeGenFunction &CGF,
1434+
PrePostActionTy &Action) {
14351435
CGBuilderTy &Bld = CGF.Builder;
14361436
llvm::Value *NumThreadsVal = NumThreads;
14371437
llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];
@@ -1479,34 +1479,21 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
14791479
NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty);
14801480

14811481
assert(IfCondVal && "Expected a value");
1482-
RuntimeFunction FnID = OMPRTL___kmpc_parallel_51;
14831482
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1484-
if (CGM.getLangOpts().OpenMPNoNestedParallelism &&
1485-
CGM.IsSPMDExecutionMode()) {
1486-
llvm::Value *Args[] = {
1487-
RTLoc, NumThreadsVal, FnPtr,
1488-
Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
1489-
CGF.VoidPtrPtrTy),
1490-
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
1491-
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1492-
CGM.getModule(), OMPRTL___kmpc_parallel_spmd),
1493-
Args);
1494-
} else {
1495-
llvm::SmallVector<llvm::Value *, 10> Args(
1496-
{RTLoc, getThreadID(CGF, Loc), IfCondVal, NumThreadsVal,
1497-
llvm::ConstantInt::get(CGF.Int32Ty, -1), FnPtr, ID,
1498-
Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
1499-
CGF.VoidPtrPtrTy),
1500-
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())});
1501-
if (NumThreadsModifier == OMPC_NUMTHREADS_strict) {
1502-
FnID = OMPRTL___kmpc_parallel_60;
1503-
Args.append({llvm::ConstantInt::get(CGM.Int32Ty, true),
1504-
emitSeverityClause(Severity),
1505-
emitMessageClause(CGF, Message)});
1506-
}
1507-
CGF.EmitRuntimeCall(
1508-
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
1509-
}
1483+
llvm::Value *Args[] = {
1484+
RTLoc,
1485+
getThreadID(CGF, Loc),
1486+
IfCondVal,
1487+
NumThreadsVal,
1488+
llvm::ConstantInt::get(CGF.Int32Ty, -1),
1489+
FnPtr,
1490+
ID,
1491+
Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
1492+
CGF.VoidPtrPtrTy),
1493+
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
1494+
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1495+
CGM.getModule(), OMPRTL___kmpc_parallel_51),
1496+
Args);
15101497
};
15111498

15121499
RegionCodeGenTy RCG(ParallelGen);

clang/lib/CodeGen/CGOpenMPRuntimeGPU.h

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -224,11 +224,6 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
224224
/// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
225225
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
226226
/// clause.
227-
/// If the modifier 'strict' is given:
228-
/// Emits call to void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32
229-
/// global_tid, kmp_int32 num_threads, int severity, const char *message) to
230-
/// generate code for 'num_threads' clause with 'strict' modifier.
231-
/// \param NumThreads An integer value of threads.
232227
void emitNumThreadsClause(
233228
CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
234229
OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown,
@@ -297,11 +292,11 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
297292
/// \param NumThreads The value corresponding to the num_threads clause, if
298293
/// any, or nullptr.
299294
/// \param NumThreadsModifier The modifier of the num_threads clause, if
300-
/// any, ignored otherwise.
295+
/// any, ignored otherwise. Currently unused on the device.
301296
/// \param Severity The severity corresponding to the num_threads clause, if
302-
/// any, ignored otherwise.
297+
/// any, ignored otherwise. Currently unused on the device.
303298
/// \param Message The message string corresponding to the num_threads clause,
304-
/// if any, or nullptr.
299+
/// if any, or nullptr. Currently unused on the device.
305300
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
306301
llvm::Function *OutlinedFn,
307302
ArrayRef<llvm::Value *> CapturedVars,

clang/test/AST/ByteCode/openmp.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ extern int omp_get_thread_num(void);
1717

1818
int test2() {
1919
int x = 0;
20-
int device_result[N] = {0};
20+
int result[N] = {0};
2121

22-
#pragma omp target parallel loop num_threads(strict: N) severity(warning) message("msg")
22+
#pragma omp parallel loop num_threads(strict: N) severity(warning) message("msg")
2323
for (int i = 0; i < N; i++) {
2424
x = omp_get_thread_num();
25-
device_result[i] = i + x;
25+
result[i] = i + x;
2626
}
2727
}
2828

0 commit comments

Comments
 (0)