Skip to content

Commit aa3cea2

Browse files
authored
Merge branch 'main' into x86-fixup-sse-blend-mov
2 parents 8abe0d4 + 43be31e commit aa3cea2

File tree

144 files changed

+17611
-1820
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

144 files changed

+17611
-1820
lines changed

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ FilterMemProfile("filter-mem-profile",
6161
cl::init(true),
6262
cl::cat(AggregatorCategory));
6363

64+
static cl::opt<bool> ParseMemProfile(
65+
"parse-mem-profile",
66+
cl::desc("enable memory profile parsing if it's present in the input data, "
67+
"on by default unless `--itrace` is set."),
68+
cl::init(true), cl::cat(AggregatorCategory));
69+
6470
static cl::opt<unsigned long long>
6571
FilterPID("pid",
6672
cl::desc("only use samples from process with specified PID"),
@@ -181,6 +187,10 @@ void DataAggregator::start() {
181187
"script -F pid,event,ip",
182188
/*Wait = */false);
183189
} else if (!opts::ITraceAggregation.empty()) {
190+
// Disable parsing memory profile from trace data, unless requested by user.
191+
if (!opts::ParseMemProfile.getNumOccurrences())
192+
opts::ParseMemProfile = false;
193+
184194
std::string ItracePerfScriptArgs = llvm::formatv(
185195
"script -F pid,brstack --itrace={0}", opts::ITraceAggregation);
186196
launchPerfProcess("branch events with itrace", MainEventsPPI,
@@ -191,12 +201,9 @@ void DataAggregator::start() {
191201
/*Wait = */ false);
192202
}
193203

194-
// Note: we launch script for mem events regardless of the option, as the
195-
// command fails fairly fast if mem events were not collected.
196-
launchPerfProcess("mem events",
197-
MemEventsPPI,
198-
"script -F pid,event,addr,ip",
199-
/*Wait = */false);
204+
if (opts::ParseMemProfile)
205+
launchPerfProcess("mem events", MemEventsPPI, "script -F pid,event,addr,ip",
206+
/*Wait = */ false);
200207

201208
launchPerfProcess("process events", MMapEventsPPI,
202209
"script --show-mmap-events --no-itrace",
@@ -217,7 +224,8 @@ void DataAggregator::abort() {
217224
sys::Wait(TaskEventsPPI.PI, 1, &Error);
218225
sys::Wait(MMapEventsPPI.PI, 1, &Error);
219226
sys::Wait(MainEventsPPI.PI, 1, &Error);
220-
sys::Wait(MemEventsPPI.PI, 1, &Error);
227+
if (opts::ParseMemProfile)
228+
sys::Wait(MemEventsPPI.PI, 1, &Error);
221229

222230
deleteTempFiles();
223231

@@ -506,7 +514,8 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
506514
errs() << "PERF2BOLT: failed to parse samples\n";
507515

508516
// Special handling for memory events
509-
if (!prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
517+
if (opts::ParseMemProfile &&
518+
!prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
510519
if (const std::error_code EC = parseMemEvents())
511520
errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
512521
<< '\n';

clang/docs/HIPSupport.rst

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
HIP Support
1818
=============
1919

20-
HIP (Heterogeneous-Compute Interface for Portability) `<https://github.com/ROCm-Developer-Tools/HIP>`_ is
20+
HIP (Heterogeneous-Compute Interface for Portability) `<https://github.com/ROCm/HIP>`_ is
2121
a C++ Runtime API and Kernel Language. It enables developers to create portable applications for
2222
offloading computation to different hardware platforms from a single source code.
2323

@@ -41,9 +41,9 @@ backend or the out-of-tree LLVM-SPIRV translator. The SPIR-V is then bundled and
4141
.. note::
4242
While Clang does not directly provide HIP support for NVIDIA GPUs and CPUs, these platforms are supported via other means:
4343

44-
- NVIDIA GPUs: HIP support is offered through the HIP project `<https://github.com/ROCm-Developer-Tools/HIP>`_, which provides a header-only library for translating HIP runtime APIs into CUDA runtime APIs. The code is subsequently compiled using NVIDIA's `nvcc`.
44+
- NVIDIA GPUs: HIP support is offered through the HIP project `<https://github.com/ROCm/HIP>`_, which provides a header-only library for translating HIP runtime APIs into CUDA runtime APIs. The code is subsequently compiled using NVIDIA's `nvcc`.
4545

46-
- CPUs: HIP support is available through the HIP-CPU runtime library `<https://github.com/ROCm-Developer-Tools/HIP-CPU>`_. This header-only library enables CPUs to execute unmodified HIP code.
46+
- CPUs: HIP support is available through the HIP-CPU runtime library `<https://github.com/ROCm/HIP-CPU>`_. This header-only library enables CPUs to execute unmodified HIP code.
4747

4848

4949
Example Usage
@@ -328,7 +328,7 @@ The `parallel_unsequenced_policy <https://en.cppreference.com/w/cpp/algorithm/ex
328328
maps relatively well to the execution model of AMD GPUs. This, coupled with the
329329
the availability and maturity of GPU accelerated algorithm libraries that
330330
implement most / all corresponding algorithms in the standard library
331-
(e.g. `rocThrust <https://github.com/ROCmSoftwarePlatform/rocThrust>`__), makes
331+
(e.g. `rocThrust <https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocthrust>`__), makes
332332
it feasible to provide seamless accelerator offload for supported algorithms,
333333
when an accelerated version exists. Thus, it becomes possible to easily access
334334
the computational resources of an AMD accelerator, via a well specified,
@@ -483,7 +483,7 @@ such as GPUs, work.
483483
allocation / deallocation functions with accelerator-aware equivalents,
484484
based on a pre-established table; the list of functions that can be
485485
interposed is available
486-
`here <https://github.com/ROCmSoftwarePlatform/roc-stdpar#allocation--deallocation-interposition-status>`__;
486+
`here <https://github.com/ROCm/roc-stdpar#allocation--deallocation-interposition-status>`__;
487487
- This is only run when compiling for the host.
488488

489489
The second pass is optional.
@@ -627,7 +627,7 @@ Linux operating system. Support is synthesised in the following table:
627627
The minimum Linux kernel version for running in HMM mode is 6.4.
628628

629629
The forwarding header can be obtained from
630-
`its GitHub repository <https://github.com/ROCmSoftwarePlatform/roc-stdpar>`_.
630+
`its GitHub repository <https://github.com/ROCm/roc-stdpar>`_.
631631
It will be packaged with a future `ROCm <https://rocm.docs.amd.com/en/latest/>`_
632632
release. Because accelerated algorithms are provided via
633633
`rocThrust <https://rocm.docs.amd.com/projects/rocThrust/en/latest/>`_, a
@@ -636,7 +636,7 @@ transitive dependency on
636636
can be obtained either by installing their associated components of the
637637
`ROCm <https://rocm.docs.amd.com/en/latest/>`_ stack, or from their respective
638638
repositories. The list algorithms that can be offloaded is available
639-
`here <https://github.com/ROCmSoftwarePlatform/roc-stdpar#algorithm-support-status>`_.
639+
`here <https://github.com/ROCm/roc-stdpar#algorithm-support-status>`_.
640640

641641
HIP Specific Elements
642642
---------------------

clang/include/clang/CIR/MissingFeatures.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ struct MissingFeatures {
172172
static bool astVarDeclInterface() { return false; }
173173
static bool stackSaveOp() { return false; }
174174
static bool aggValueSlot() { return false; }
175+
static bool aggValueSlotMayOverlap() { return false; }
175176
static bool generateDebugInfo() { return false; }
176177
static bool pointerOverflowSanitizer() { return false; }
177178
static bool fpConstraints() { return false; }
@@ -227,7 +228,6 @@ struct MissingFeatures {
227228
static bool implicitConstructorArgs() { return false; }
228229
static bool intrinsics() { return false; }
229230
static bool attributeNoBuiltin() { return false; }
230-
static bool emitCtorPrologue() { return false; }
231231
static bool thunks() { return false; }
232232
static bool runCleanupsScope() { return false; }
233233

clang/lib/CIR/CodeGen/CIRGenCall.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,7 @@ void CIRGenFunction::emitDelegateCallArg(CallArgList &args,
189189
// For the most part, we just need to load the alloca, except that aggregate
190190
// r-values are actually pointers to temporaries.
191191
} else {
192-
cgm.errorNYI(param->getSourceRange(),
193-
"emitDelegateCallArg: convertTempToRValue");
192+
args.add(convertTempToRValue(local, type, loc), type);
194193
}
195194

196195
// Deactivate the cleanup for the callee-destructed param that was pushed.

clang/lib/CIR/CodeGen/CIRGenClass.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,21 @@ bool CIRGenFunction::isConstructorDelegationValid(
5353
return true;
5454
}
5555

56+
/// This routine generates necessary code to initialize base classes and
57+
/// non-static data members belonging to this constructor.
58+
void CIRGenFunction::emitCtorPrologue(const CXXConstructorDecl *cd,
59+
CXXCtorType ctorType,
60+
FunctionArgList &args) {
61+
if (cd->isDelegatingConstructor())
62+
return emitDelegatingCXXConstructorCall(cd, args);
63+
64+
if (cd->getNumCtorInitializers() != 0) {
65+
// There's much more to do here.
66+
cgm.errorNYI(cd->getSourceRange(), "emitCtorPrologue: any initializer");
67+
return;
68+
}
69+
}
70+
5671
Address CIRGenFunction::loadCXXThisAddress() {
5772
assert(curFuncDecl && "loading 'this' without a func declaration?");
5873
assert(isa<CXXMethodDecl>(curFuncDecl));
@@ -102,6 +117,29 @@ void CIRGenFunction::emitDelegateCXXConstructorCall(
102117
/*Delegating=*/true, thisAddr, delegateArgs, loc);
103118
}
104119

120+
void CIRGenFunction::emitDelegatingCXXConstructorCall(
121+
const CXXConstructorDecl *ctor, const FunctionArgList &args) {
122+
assert(ctor->isDelegatingConstructor());
123+
124+
Address thisPtr = loadCXXThisAddress();
125+
126+
assert(!cir::MissingFeatures::objCGC());
127+
assert(!cir::MissingFeatures::sanitizers());
128+
AggValueSlot aggSlot = AggValueSlot::forAddr(
129+
thisPtr, Qualifiers(), AggValueSlot::IsDestructed,
130+
AggValueSlot::IsNotAliased, AggValueSlot::MayOverlap,
131+
AggValueSlot::IsNotZeroed);
132+
133+
emitAggExpr(ctor->init_begin()[0]->getInit(), aggSlot);
134+
135+
const CXXRecordDecl *classDecl = ctor->getParent();
136+
if (cgm.getLangOpts().Exceptions && !classDecl->hasTrivialDestructor()) {
137+
cgm.errorNYI(ctor->getSourceRange(),
138+
"emitDelegatingCXXConstructorCall: exception");
139+
return;
140+
}
141+
}
142+
105143
Address CIRGenFunction::getAddressOfBaseClass(
106144
Address value, const CXXRecordDecl *derived,
107145
llvm::iterator_range<CastExpr::path_const_iterator> path,

clang/lib/CIR/CodeGen/CIRGenDecl.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,12 @@ void CIRGenFunction::emitExprAsInit(const Expr *init, const ValueDecl *d,
259259
return;
260260
}
261261
case cir::TEK_Aggregate:
262-
emitAggExpr(init, AggValueSlot::forLValue(lvalue));
262+
// The overlap flag here should be calculated.
263+
assert(!cir::MissingFeatures::aggValueSlotMayOverlap());
264+
emitAggExpr(init,
265+
AggValueSlot::forLValue(lvalue, AggValueSlot::IsDestructed,
266+
AggValueSlot::IsNotAliased,
267+
AggValueSlot::MayOverlap));
263268
return;
264269
}
265270
llvm_unreachable("bad evaluation kind");

clang/lib/CIR/CodeGen/CIRGenExpr.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1261,6 +1261,23 @@ Address CIRGenFunction::emitArrayToPointerDecay(const Expr *e) {
12611261
return Address(ptr, addr.getAlignment());
12621262
}
12631263

1264+
/// Given the address of a temporary variable, produce an r-value of its type.
1265+
RValue CIRGenFunction::convertTempToRValue(Address addr, clang::QualType type,
1266+
clang::SourceLocation loc) {
1267+
LValue lvalue = makeAddrLValue(addr, type, AlignmentSource::Decl);
1268+
switch (getEvaluationKind(type)) {
1269+
case cir::TEK_Complex:
1270+
cgm.errorNYI(loc, "convertTempToRValue: complex type");
1271+
return RValue::get(nullptr);
1272+
case cir::TEK_Aggregate:
1273+
cgm.errorNYI(loc, "convertTempToRValue: aggregate type");
1274+
return RValue::get(nullptr);
1275+
case cir::TEK_Scalar:
1276+
return RValue::get(emitLoadOfScalar(lvalue, loc));
1277+
}
1278+
llvm_unreachable("bad evaluation kind");
1279+
}
1280+
12641281
/// Emit an `if` on a boolean condition, filling `then` and `else` into
12651282
/// appropriated regions.
12661283
mlir::LogicalResult CIRGenFunction::emitIfOnBoolExpr(const Expr *cond,
@@ -1473,6 +1490,10 @@ void CIRGenFunction::emitCXXConstructExpr(const CXXConstructExpr *e,
14731490
type = Ctor_Complete;
14741491
break;
14751492
case CXXConstructionKind::Delegating:
1493+
// We should be emitting a constructor; GlobalDecl will assert this
1494+
type = curGD.getCtorType();
1495+
delegating = true;
1496+
break;
14761497
case CXXConstructionKind::VirtualBase:
14771498
case CXXConstructionKind::NonVirtualBase:
14781499
cgm.errorNYI(e->getSourceRange(),

clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,11 @@ void AggExprEmitter::emitInitializationToLValue(Expr *e, LValue lv) {
203203
cgf.cgm.errorNYI("emitInitializationToLValue TEK_Complex");
204204
break;
205205
case cir::TEK_Aggregate:
206-
cgf.emitAggExpr(e, AggValueSlot::forLValue(lv));
206+
cgf.emitAggExpr(e, AggValueSlot::forLValue(lv, AggValueSlot::IsDestructed,
207+
AggValueSlot::IsNotAliased,
208+
AggValueSlot::MayOverlap,
209+
dest.isZeroed()));
210+
207211
return;
208212
case cir::TEK_Scalar:
209213
if (lv.isSimple())
@@ -284,6 +288,8 @@ LValue CIRGenFunction::emitAggExprToLValue(const Expr *e) {
284288
assert(hasAggregateEvaluationKind(e->getType()) && "Invalid argument!");
285289
Address temp = createMemTemp(e->getType(), getLoc(e->getSourceRange()));
286290
LValue lv = makeAddrLValue(temp, e->getType());
287-
emitAggExpr(e, AggValueSlot::forLValue(lv));
291+
emitAggExpr(e, AggValueSlot::forLValue(lv, AggValueSlot::IsNotDestructed,
292+
AggValueSlot::IsNotAliased,
293+
AggValueSlot::DoesNotOverlap));
288294
return lv;
289295
}

clang/lib/CIR/CodeGen/CIRGenFunction.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -526,14 +526,8 @@ void CIRGenFunction::emitConstructorBody(FunctionArgList &args) {
526526
// TODO: in restricted cases, we can emit the vbase initializers of a
527527
// complete ctor and then delegate to the base ctor.
528528

529-
assert(!cir::MissingFeatures::emitCtorPrologue());
530-
if (ctor->isDelegatingConstructor()) {
531-
// This will be handled in emitCtorPrologue, but we should emit a diagnostic
532-
// rather than silently fail to delegate.
533-
cgm.errorNYI(ctor->getSourceRange(),
534-
"emitConstructorBody: delegating ctor");
535-
return;
536-
}
529+
// Emit the constructor prologue, i.e. the base and member initializers.
530+
emitCtorPrologue(ctor, ctorType, args);
537531

538532
// TODO(cir): propagate this result via mlir::logical result. Just unreachable
539533
// now just to have it handled.

clang/lib/CIR/CodeGen/CIRGenFunction.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,9 @@ class CIRGenFunction : public CIRGenTypeCache {
474474

475475
bool shouldNullCheckClassCastValue(const CastExpr *ce);
476476

477+
RValue convertTempToRValue(Address addr, clang::QualType type,
478+
clang::SourceLocation loc);
479+
477480
static bool
478481
isConstructorDelegationValid(const clang::CXXConstructorDecl *ctor);
479482

@@ -797,6 +800,16 @@ class CIRGenFunction : public CIRGenTypeCache {
797800
const CXXMethodDecl *md,
798801
ReturnValueSlot returnValue);
799802

803+
void emitCtorPrologue(const clang::CXXConstructorDecl *ctor,
804+
clang::CXXCtorType ctorType, FunctionArgList &args);
805+
806+
// It's important not to confuse this and emitDelegateCXXConstructorCall.
807+
// Delegating constructors are the C++11 feature. The constructor delegate
808+
// optimization is used to reduce duplication in the base and complete
809+
// constructors where they are substantially the same.
810+
void emitDelegatingCXXConstructorCall(const CXXConstructorDecl *ctor,
811+
const FunctionArgList &args);
812+
800813
mlir::LogicalResult emitDoStmt(const clang::DoStmt &s);
801814

802815
/// Emit an expression as an initializer for an object (variable, field, etc.)

0 commit comments

Comments
 (0)