Skip to content

Commit bd150c8

Browse files
authored
Merge branch 'main' into blocking-scatter-ops
2 parents adf6358 + 51aa6a4 commit bd150c8

File tree

145 files changed

+3251
-2178
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

145 files changed

+3251
-2178
lines changed

clang/include/clang/AST/StmtOpenMP.h

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5787,10 +5787,13 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
57875787
TransformedStmtOffset,
57885788
};
57895789

5790-
explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc)
5790+
explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc,
5791+
unsigned NumLoops)
57915792
: OMPLoopTransformationDirective(OMPReverseDirectiveClass,
57925793
llvm::omp::OMPD_reverse, StartLoc,
5793-
EndLoc, 1) {}
5794+
EndLoc, NumLoops) {
5795+
setNumGeneratedLoops(NumLoops);
5796+
}
57945797

57955798
void setPreInits(Stmt *PreInits) {
57965799
Data->getChildren()[PreInitsOffset] = PreInits;
@@ -5806,19 +5809,23 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
58065809
/// \param C Context of the AST.
58075810
/// \param StartLoc Location of the introducer (e.g. the 'omp' token).
58085811
/// \param EndLoc Location of the directive's end (e.g. the tok::eod).
5812+
/// \param NumLoops Number of affected loops
58095813
/// \param AssociatedStmt The outermost associated loop.
58105814
/// \param TransformedStmt The loop nest after tiling, or nullptr in
58115815
/// dependent contexts.
58125816
/// \param PreInits Helper preinits statements for the loop nest.
5813-
static OMPReverseDirective *
5814-
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
5815-
Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits);
5817+
static OMPReverseDirective *Create(const ASTContext &C,
5818+
SourceLocation StartLoc,
5819+
SourceLocation EndLoc,
5820+
Stmt *AssociatedStmt, unsigned NumLoops,
5821+
Stmt *TransformedStmt, Stmt *PreInits);
58165822

58175823
/// Build an empty '#pragma omp reverse' AST node for deserialization.
58185824
///
58195825
/// \param C Context of the AST.
5820-
/// \param NumClauses Number of clauses to allocate.
5821-
static OMPReverseDirective *CreateEmpty(const ASTContext &C);
5826+
/// \param NumLoops Number of associated loops to allocate
5827+
static OMPReverseDirective *CreateEmpty(const ASTContext &C,
5828+
unsigned NumLoops);
58225829

58235830
/// Gets/sets the associated loops after the transformation, i.e. after
58245831
/// de-sugaring.
@@ -5857,7 +5864,7 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
58575864
: OMPLoopTransformationDirective(OMPInterchangeDirectiveClass,
58585865
llvm::omp::OMPD_interchange, StartLoc,
58595866
EndLoc, NumLoops) {
5860-
setNumGeneratedLoops(3 * NumLoops);
5867+
setNumGeneratedLoops(NumLoops);
58615868
}
58625869

58635870
void setPreInits(Stmt *PreInits) {

clang/lib/AST/StmtOpenMP.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -471,18 +471,21 @@ OMPUnrollDirective *OMPUnrollDirective::CreateEmpty(const ASTContext &C,
471471
OMPReverseDirective *
472472
OMPReverseDirective::Create(const ASTContext &C, SourceLocation StartLoc,
473473
SourceLocation EndLoc, Stmt *AssociatedStmt,
474-
Stmt *TransformedStmt, Stmt *PreInits) {
474+
unsigned NumLoops, Stmt *TransformedStmt,
475+
Stmt *PreInits) {
475476
OMPReverseDirective *Dir = createDirective<OMPReverseDirective>(
476-
C, {}, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc);
477+
C, {}, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc,
478+
NumLoops);
477479
Dir->setTransformedStmt(TransformedStmt);
478480
Dir->setPreInits(PreInits);
479481
return Dir;
480482
}
481483

482-
OMPReverseDirective *OMPReverseDirective::CreateEmpty(const ASTContext &C) {
484+
OMPReverseDirective *OMPReverseDirective::CreateEmpty(const ASTContext &C,
485+
unsigned NumLoops) {
483486
return createEmptyDirective<OMPReverseDirective>(
484487
C, /*NumClauses=*/0, /*HasAssociatedStmt=*/true,
485-
TransformedStmtOffset + 1, SourceLocation(), SourceLocation());
488+
TransformedStmtOffset + 1, SourceLocation(), SourceLocation(), NumLoops);
486489
}
487490

488491
OMPInterchangeDirective *OMPInterchangeDirective::Create(

clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
7878
assert(!cir::MissingFeatures::builtinCallMathErrno());
7979
assert(!cir::MissingFeatures::builtinCall());
8080

81+
mlir::Location loc = getLoc(e->getExprLoc());
82+
8183
switch (builtinIDIfNoAsmLabel) {
8284
default:
8385
break;
@@ -88,9 +90,16 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
8890
return RValue::get(nullptr);
8991

9092
mlir::Value argValue = emitCheckedArgForAssume(e->getArg(0));
91-
builder.create<cir::AssumeOp>(getLoc(e->getExprLoc()), argValue);
93+
builder.create<cir::AssumeOp>(loc, argValue);
9294
return RValue::get(nullptr);
9395
}
96+
97+
case Builtin::BI__builtin_complex: {
98+
mlir::Value real = emitScalarExpr(e->getArg(0));
99+
mlir::Value imag = emitScalarExpr(e->getArg(1));
100+
mlir::Value complex = builder.createComplexCreate(loc, real, imag);
101+
return RValue::get(complex);
102+
}
94103
}
95104

96105
cgm.errorNYI(e->getSourceRange(), "unimplemented builtin call");

clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,25 @@ class ComplexExprEmitter : public StmtVisitor<ComplexExprEmitter, mlir::Value> {
1515
explicit ComplexExprEmitter(CIRGenFunction &cgf)
1616
: cgf(cgf), builder(cgf.getBuilder()) {}
1717

18+
//===--------------------------------------------------------------------===//
19+
// Utilities
20+
//===--------------------------------------------------------------------===//
21+
22+
/// Given an expression with complex type that represents a value l-value,
23+
/// this method emits the address of the l-value, then loads and returns the
24+
/// result.
25+
mlir::Value emitLoadOfLValue(const Expr *e) {
26+
return emitLoadOfLValue(cgf.emitLValue(e), e->getExprLoc());
27+
}
28+
29+
mlir::Value emitLoadOfLValue(LValue lv, SourceLocation loc);
30+
1831
/// Store the specified real/imag parts into the
1932
/// specified value pointer.
2033
void emitStoreOfComplex(mlir::Location loc, mlir::Value val, LValue lv,
2134
bool isInit);
2235

36+
mlir::Value VisitCallExpr(const CallExpr *e);
2337
mlir::Value VisitInitListExpr(InitListExpr *e);
2438

2539
mlir::Value VisitImaginaryLiteral(const ImaginaryLiteral *il);
@@ -34,18 +48,35 @@ static const ComplexType *getComplexType(QualType type) {
3448
return cast<ComplexType>(cast<AtomicType>(type)->getValueType());
3549
}
3650

51+
mlir::Value ComplexExprEmitter::emitLoadOfLValue(LValue lv,
52+
SourceLocation loc) {
53+
assert(lv.isSimple() && "non-simple complex l-value?");
54+
if (lv.getType()->isAtomicType())
55+
cgf.cgm.errorNYI(loc, "emitLoadOfLValue with Atomic LV");
56+
57+
const Address srcAddr = lv.getAddress();
58+
return builder.createLoad(cgf.getLoc(loc), srcAddr);
59+
}
60+
3761
void ComplexExprEmitter::emitStoreOfComplex(mlir::Location loc, mlir::Value val,
3862
LValue lv, bool isInit) {
3963
if (lv.getType()->isAtomicType() ||
4064
(!isInit && cgf.isLValueSuitableForInlineAtomic(lv))) {
41-
cgf.cgm.errorNYI("StoreOfComplex with Atomic LV");
65+
cgf.cgm.errorNYI(loc, "StoreOfComplex with Atomic LV");
4266
return;
4367
}
4468

4569
const Address destAddr = lv.getAddress();
4670
builder.createStore(loc, val, destAddr);
4771
}
4872

73+
mlir::Value ComplexExprEmitter::VisitCallExpr(const CallExpr *e) {
74+
if (e->getCallReturnType(cgf.getContext())->isReferenceType())
75+
return emitLoadOfLValue(e);
76+
77+
return cgf.emitCallExpr(e).getValue();
78+
}
79+
4980
mlir::Value ComplexExprEmitter::VisitInitListExpr(InitListExpr *e) {
5081
mlir::Location loc = cgf.getLoc(e->getExprLoc());
5182
if (e->getNumInits() == 2) {

clang/lib/CIR/CodeGen/CIRGenTypes.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,22 @@ mlir::Type CIRGenTypes::convertType(QualType type) {
416416
break;
417417
}
418418

419+
case Type::IncompleteArray: {
420+
const IncompleteArrayType *arrTy = cast<IncompleteArrayType>(ty);
421+
if (arrTy->getIndexTypeCVRQualifiers() != 0)
422+
cgm.errorNYI(SourceLocation(), "non trivial array types", type);
423+
424+
mlir::Type elemTy = convertTypeForMem(arrTy->getElementType());
425+
// int X[] -> [0 x int], unless the element type is not sized. If it is
426+
// unsized (e.g. an incomplete record) just use [0 x i8].
427+
if (!builder.isSized(elemTy)) {
428+
elemTy = cgm.SInt8Ty;
429+
}
430+
431+
resultType = cir::ArrayType::get(elemTy, 0);
432+
break;
433+
}
434+
419435
case Type::ConstantArray: {
420436
const ConstantArrayType *arrTy = cast<ConstantArrayType>(ty);
421437
mlir::Type elemTy = convertTypeForMem(arrTy->getElementType());

clang/lib/CIR/CodeGen/CIRGenValue.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ class RValue {
8888
return er;
8989
}
9090

91-
// FIXME: Aggregate rvalues need to retain information about whether they are
9291
// volatile or not. Remove default to find all places that probably get this
9392
// wrong.
9493

clang/lib/CodeGen/CGCall.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -83,17 +83,8 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
8383
return llvm::CallingConv::AArch64_SVE_VectorCall;
8484
case CC_SpirFunction:
8585
return llvm::CallingConv::SPIR_FUNC;
86-
case CC_DeviceKernel: {
87-
if (CGM.getLangOpts().OpenCL)
88-
return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
89-
if (CGM.getTriple().isSPIROrSPIRV())
90-
return llvm::CallingConv::SPIR_KERNEL;
91-
if (CGM.getTriple().isAMDGPU())
92-
return llvm::CallingConv::AMDGPU_KERNEL;
93-
if (CGM.getTriple().isNVPTX())
94-
return llvm::CallingConv::PTX_Kernel;
95-
llvm_unreachable("Unknown kernel calling convention");
96-
}
86+
case CC_DeviceKernel:
87+
return CGM.getTargetCodeGenInfo().getDeviceKernelCallingConv();
9788
case CC_PreserveMost:
9889
return llvm::CallingConv::PreserveMost;
9990
case CC_PreserveAll:

clang/lib/CodeGen/TargetInfo.cpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -103,18 +103,21 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib,
103103
Opt += Lib;
104104
}
105105

106-
unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
107-
// OpenCL kernels are called via an explicit runtime API with arguments
108-
// set with clSetKernelArg(), not as normal sub-functions.
109-
// Return SPIR_KERNEL by default as the kernel calling convention to
110-
// ensure the fingerprint is fixed such way that each OpenCL argument
111-
// gets one matching argument in the produced kernel function argument
112-
// list to enable feasible implementation of clSetKernelArg() with
113-
// aggregates etc. In case we would use the default C calling conv here,
114-
// clSetKernelArg() might break depending on the target-specific
115-
// conventions; different targets might split structs passed as values
116-
// to multiple function arguments etc.
117-
return llvm::CallingConv::SPIR_KERNEL;
106+
unsigned TargetCodeGenInfo::getDeviceKernelCallingConv() const {
107+
if (getABIInfo().getContext().getLangOpts().OpenCL) {
108+
// Device kernels are called via an explicit runtime API with arguments,
109+
// such as set with clSetKernelArg() for OpenCL, not as normal
110+
// sub-functions. Return SPIR_KERNEL by default as the kernel calling
111+
// convention to ensure the fingerprint is fixed such way that each kernel
112+
// argument gets one matching argument in the produced kernel function
113+
// argument list to enable feasible implementation of clSetKernelArg() with
114+
// aggregates etc. In case we would use the default C calling conv here,
115+
// clSetKernelArg() might break depending on the target-specific
116+
// conventions; different targets might split structs passed as values
117+
// to multiple function arguments etc.
118+
return llvm::CallingConv::SPIR_KERNEL;
119+
}
120+
llvm_unreachable("Unknown kernel calling convention");
118121
}
119122

120123
void TargetCodeGenInfo::setOCLKernelStubCallingConvention(

clang/lib/CodeGen/TargetInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,8 @@ class TargetCodeGenInfo {
298298
llvm::StringRef Value,
299299
llvm::SmallString<32> &Opt) const {}
300300

301-
/// Get LLVM calling convention for OpenCL kernel.
302-
virtual unsigned getOpenCLKernelCallingConv() const;
301+
/// Get LLVM calling convention for device kernels.
302+
virtual unsigned getDeviceKernelCallingConv() const;
303303

304304
/// Get target specific null pointer.
305305
/// \param T is the LLVM type of the null pointer.

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
304304

305305
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
306306
CodeGen::CodeGenModule &M) const override;
307-
unsigned getOpenCLKernelCallingConv() const override;
307+
unsigned getDeviceKernelCallingConv() const override;
308308

309309
llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
310310
llvm::PointerType *T, QualType QT) const override;
@@ -431,7 +431,7 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
431431
F->addFnAttr("amdgpu-ieee", "false");
432432
}
433433

434-
unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
434+
unsigned AMDGPUTargetCodeGenInfo::getDeviceKernelCallingConv() const {
435435
return llvm::CallingConv::AMDGPU_KERNEL;
436436
}
437437

0 commit comments

Comments
 (0)