Skip to content

Commit e499464

Browse files
authored
Merge branch 'main' into chinmaydd/gisel-buffer-load
2 parents d8d8fd0 + d23f781 commit e499464

File tree

85 files changed

+3976
-281
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+3976
-281
lines changed

clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,22 @@ getNewFieldsOrder(const RecordDecl *Definition,
164164
return NewFieldsOrder;
165165
}
166166

167+
static bool isOrderValid(const RecordDecl *RD, ArrayRef<unsigned> FieldOrder) {
168+
if (FieldOrder.empty())
169+
return false;
170+
171+
// If there is a flexible array member in the struct, it must remain the last
172+
// field.
173+
if (RD->hasFlexibleArrayMember() &&
174+
FieldOrder.back() != FieldOrder.size() - 1) {
175+
llvm::errs()
176+
<< "Flexible array member must remain the last field in the struct\n";
177+
return false;
178+
}
179+
180+
return true;
181+
}
182+
167183
struct ReorderedStruct {
168184
public:
169185
ReorderedStruct(const RecordDecl *Decl, ArrayRef<unsigned> NewFieldsOrder)
@@ -662,7 +678,7 @@ class ReorderingConsumer : public ASTConsumer {
662678
return;
663679
SmallVector<unsigned, 4> NewFieldsOrder =
664680
getNewFieldsOrder(RD, DesiredFieldsOrder);
665-
if (NewFieldsOrder.empty())
681+
if (!isOrderValid(RD, NewFieldsOrder))
666682
return;
667683
ReorderedStruct RS{RD, NewFieldsOrder};
668684

@@ -699,7 +715,7 @@ class ReorderingConsumer : public ASTConsumer {
699715

700716
std::unique_ptr<ASTConsumer> ReorderFieldsAction::newASTConsumer() {
701717
return std::make_unique<ReorderingConsumer>(RecordName, DesiredFieldsOrder,
702-
Replacements);
718+
Replacements);
703719
}
704720

705721
} // namespace reorder_fields
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: clang-reorder-fields -record-name Foo -fields-order z,y,x %s -- 2>&1 | FileCheck --check-prefix=CHECK-BAD %s
2+
// RUN: clang-reorder-fields -record-name Foo -fields-order y,x,z %s -- | FileCheck --check-prefix=CHECK-GOOD %s
3+
4+
// CHECK-BAD: {{^Flexible array member must remain the last field in the struct}}
5+
6+
struct Foo {
7+
int x; // CHECK-GOOD: {{^ int y;}}
8+
int y; // CHECK-GOOD-NEXT: {{^ int x;}}
9+
int z[]; // CHECK-GOOD-NEXT: {{^ int z\[\];}}
10+
};

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ Bug Fixes to C++ Support
433433
object type. (#GH151531)
434434
- Suppress ``-Wdouble-promotion`` when explicitly asked for with C++ list initialization (#GH33409).
435435
- Fix the result of `__builtin_is_implicit_lifetime` for types with a user-provided constructor. (#GH160610)
436+
- Correctly deduce return types in ``decltype`` expressions. (#GH160497) (#GH56652) (#GH116319) (#GH161196)
436437

437438
Bug Fixes to AST Handling
438439
^^^^^^^^^^^^^^^^^^^^^^^^^

clang/lib/CIR/CodeGen/CIRGenClass.cpp

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -951,39 +951,55 @@ Address CIRGenFunction::getAddressOfBaseClass(
951951
bool nullCheckValue, SourceLocation loc) {
952952
assert(!path.empty() && "Base path should not be empty!");
953953

954+
CastExpr::path_const_iterator start = path.begin();
955+
const CXXRecordDecl *vBase = nullptr;
956+
954957
if ((*path.begin())->isVirtual()) {
955-
// The implementation here is actually complete, but let's flag this
956-
// as an error until the rest of the virtual base class support is in place.
957-
cgm.errorNYI(loc, "getAddrOfBaseClass: virtual base");
958-
return Address::invalid();
958+
vBase = (*start)->getType()->castAsCXXRecordDecl();
959+
++start;
959960
}
960961

961962
// Compute the static offset of the ultimate destination within its
962963
// allocating subobject (the virtual base, if there is one, or else
963964
// the "complete" object that we see).
964-
CharUnits nonVirtualOffset =
965-
cgm.computeNonVirtualBaseClassOffset(derived, path);
965+
CharUnits nonVirtualOffset = cgm.computeNonVirtualBaseClassOffset(
966+
vBase ? vBase : derived, {start, path.end()});
967+
968+
// If there's a virtual step, we can sometimes "devirtualize" it.
969+
// For now, that's limited to when the derived type is final.
970+
// TODO: "devirtualize" this for accesses to known-complete objects.
971+
if (vBase && derived->hasAttr<FinalAttr>()) {
972+
const ASTRecordLayout &layout = getContext().getASTRecordLayout(derived);
973+
CharUnits vBaseOffset = layout.getVBaseClassOffset(vBase);
974+
nonVirtualOffset += vBaseOffset;
975+
vBase = nullptr; // we no longer have a virtual step
976+
}
966977

967978
// Get the base pointer type.
968979
mlir::Type baseValueTy = convertType((path.end()[-1])->getType());
969980
assert(!cir::MissingFeatures::addressSpace());
970981

971-
// The if statement here is redundant now, but it will be needed when we add
972-
// support for virtual base classes.
973982
// If there is no virtual base, use cir.base_class_addr. It takes care of
974983
// the adjustment and the null pointer check.
975-
if (nonVirtualOffset.isZero()) {
984+
if (nonVirtualOffset.isZero() && !vBase) {
976985
assert(!cir::MissingFeatures::sanitizers());
977986
return builder.createBaseClassAddr(getLoc(loc), value, baseValueTy, 0,
978987
/*assumeNotNull=*/true);
979988
}
980989

981990
assert(!cir::MissingFeatures::sanitizers());
982991

983-
// Apply the offset
984-
value = builder.createBaseClassAddr(getLoc(loc), value, baseValueTy,
985-
nonVirtualOffset.getQuantity(),
986-
/*assumeNotNull=*/true);
992+
// Compute the virtual offset.
993+
mlir::Value virtualOffset = nullptr;
994+
if (vBase) {
995+
virtualOffset = cgm.getCXXABI().getVirtualBaseClassOffset(
996+
getLoc(loc), *this, value, derived, vBase);
997+
}
998+
999+
// Apply both offsets.
1000+
value = applyNonVirtualAndVirtualOffset(
1001+
getLoc(loc), *this, value, nonVirtualOffset, virtualOffset, derived,
1002+
vBase, baseValueTy, not nullCheckValue);
9871003

9881004
// Cast to the destination type.
9891005
value = value.withElementType(builder, baseValueTy);

clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,10 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
676676
mlir::Value VisitRealImag(const UnaryOperator *e,
677677
QualType promotionType = QualType());
678678

679+
mlir::Value VisitUnaryExtension(const UnaryOperator *e) {
680+
return Visit(e->getSubExpr());
681+
}
682+
679683
mlir::Value VisitCXXDefaultInitExpr(CXXDefaultInitExpr *die) {
680684
CIRGenFunction::CXXDefaultInitExprScope scope(cgf, die);
681685
return Visit(die->getExpr());
@@ -1278,9 +1282,6 @@ mlir::Value ScalarExprEmitter::emitPromoted(const Expr *e,
12781282
} else if (const auto *uo = dyn_cast<UnaryOperator>(e)) {
12791283
switch (uo->getOpcode()) {
12801284
case UO_Imag:
1281-
cgf.cgm.errorNYI(e->getSourceRange(),
1282-
"ScalarExprEmitter::emitPromoted unary imag");
1283-
return {};
12841285
case UO_Real:
12851286
return VisitRealImag(uo, promotionType);
12861287
case UO_Minus:

clang/lib/CIR/CodeGen/CIRGenStmt.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s,
216216
case Stmt::OMPSimdDirectiveClass:
217217
case Stmt::OMPTileDirectiveClass:
218218
case Stmt::OMPUnrollDirectiveClass:
219+
case Stmt::OMPFuseDirectiveClass:
219220
case Stmt::OMPForDirectiveClass:
220221
case Stmt::OMPForSimdDirectiveClass:
221222
case Stmt::OMPSectionsDirectiveClass:

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,8 +1090,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
10901090
if (std::optional<GCOVOptions> Options =
10911091
getGCOVOptions(CodeGenOpts, LangOpts))
10921092
PB.registerPipelineStartEPCallback(
1093-
[Options](ModulePassManager &MPM, OptimizationLevel Level) {
1094-
MPM.addPass(GCOVProfilerPass(*Options));
1093+
[this, Options](ModulePassManager &MPM, OptimizationLevel Level) {
1094+
MPM.addPass(
1095+
GCOVProfilerPass(*Options, CI.getVirtualFileSystemPtr()));
10951096
});
10961097
if (std::optional<InstrProfOptions> Options =
10971098
getInstrProfOptions(CodeGenOpts, LangOpts))

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,17 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
192192
return CGF.Builder.CreateCall(F, {Src0, Src1});
193193
}
194194

195+
static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) {
196+
if (AMDGCNScope == "agent")
197+
return "device";
198+
if (AMDGCNScope == "wavefront")
199+
return "subgroup";
200+
return AMDGCNScope;
201+
}
202+
195203
// For processing memory ordering and memory scope arguments of various
196204
// amdgcn builtins.
197-
// \p Order takes a C++11 comptabile memory-ordering specifier and converts
205+
// \p Order takes a C++11 compatible memory-ordering specifier and converts
198206
// it into LLVM's memory ordering specifier using atomic C ABI, and writes
199207
// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
200208
// specific SyncScopeID and writes it to \p SSID.
@@ -227,6 +235,8 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
227235
// Some of the atomic builtins take the scope as a string name.
228236
StringRef scp;
229237
if (llvm::getConstantStringInfo(Scope, scp)) {
238+
if (getTarget().getTriple().isSPIRV())
239+
scp = mapScopeToSPIRV(scp);
230240
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
231241
return;
232242
}
@@ -238,13 +248,19 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
238248
SSID = llvm::SyncScope::System;
239249
break;
240250
case 1: // __MEMORY_SCOPE_DEVICE
241-
SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
251+
if (getTarget().getTriple().isSPIRV())
252+
SSID = getLLVMContext().getOrInsertSyncScopeID("device");
253+
else
254+
SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
242255
break;
243256
case 2: // __MEMORY_SCOPE_WRKGRP
244257
SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
245258
break;
246259
case 3: // __MEMORY_SCOPE_WVFRNT
247-
SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
260+
if (getTarget().getTriple().isSPIRV())
261+
SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup");
262+
else
263+
SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
248264
break;
249265
case 4: // __MEMORY_SCOPE_SINGLE
250266
SSID = llvm::SyncScope::SingleThread;
@@ -1510,7 +1526,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
15101526
//
15111527
// The global/flat cases need to use agent scope to consistently produce
15121528
// the native instruction instead of a cmpxchg expansion.
1513-
SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
1529+
if (getTarget().getTriple().isSPIRV())
1530+
SSID = getLLVMContext().getOrInsertSyncScopeID("device");
1531+
else
1532+
SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
15141533
AO = AtomicOrdering::Monotonic;
15151534

15161535
// The v2bf16 builtin uses i16 instead of a natural bfloat type.

clang/lib/Sema/SemaExpr.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20108,8 +20108,9 @@ static void DoMarkVarDeclReferenced(
2010820108
bool NeededForConstantEvaluation =
2010920109
isPotentiallyConstantEvaluatedContext(SemaRef) && UsableInConstantExpr;
2011020110

20111-
bool NeedDefinition =
20112-
OdrUse == OdrUseContext::Used || NeededForConstantEvaluation;
20111+
bool NeedDefinition = OdrUse == OdrUseContext::Used ||
20112+
NeededForConstantEvaluation ||
20113+
Var->getType()->isUndeducedType();
2011320114

2011420115
assert(!isa<VarTemplatePartialSpecializationDecl>(Var) &&
2011520116
"Can't instantiate a partial template specialization.");

clang/test/CIR/CodeGen/complex.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,3 +1270,40 @@ void real_on_scalar_from_real_with_type_promotion() {
12701270
// OGCG: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float
12711271
// OGCG: %[[A_REAL_F16:.*]] = fptrunc float %[[A_REAL_F32]] to half
12721272
// OGCG: store half %[[A_REAL_F16]], ptr %[[B_ADDR]], align 2
1273+
1274+
void real_on_scalar_from_imag_with_type_promotion() {
1275+
_Float16 _Complex a;
1276+
_Float16 b = __real__(__imag__ a);
1277+
}
1278+
1279+
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"]
1280+
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init]
1281+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
1282+
// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
1283+
// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
1284+
// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
1285+
// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
1286+
// CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
1287+
// CIR: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
1288+
// CIR: %[[A_IMAG_F16:.*]] = cir.cast(floating, %[[A_IMAG_F32]] : !cir.float), !cir.f16
1289+
// CIR: cir.store{{.*}} %[[A_IMAG_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
1290+
1291+
// LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2
1292+
// LLVM: %[[B_ADDR]] = alloca half, i64 1, align 2
1293+
// LLVM: %[[TMP_A:.*]] = load { half, half }, ptr %[[A_ADDR]], align 2
1294+
// LLVM: %[[A_REAL:.*]] = extractvalue { half, half } %[[TMP_A]], 0
1295+
// LLVM: %[[A_IMAG:.*]] = extractvalue { half, half } %[[TMP_A]], 1
1296+
// LLVM: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float
1297+
// LLVM: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float
1298+
// LLVM: %[[TMP_A_COMPLEX_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[A_REAL_F32]], 0
1299+
// LLVM: %[[A_COMPLEX_F32:.*]] = insertvalue { float, float } %[[TMP_A_COMPLEX_F32]], float %[[A_IMAG_F32]], 1
1300+
// LLVM: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half
1301+
// LLVM: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2
1302+
1303+
// OGCG: %[[A_ADDR:.*]] = alloca { half, half }, align 2
1304+
// OGCG: %[[B_ADDR:.*]] = alloca half, align 2
1305+
// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[A_ADDR]], i32 0, i32 1
1306+
// OGCG: %[[A_IMAG:.*]] = load half, ptr %[[A_IMAG_PTR]], align 2
1307+
// OGCG: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float
1308+
// OGCG: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half
1309+
// OGCG: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2

0 commit comments

Comments
 (0)