Skip to content

Commit 0eee6f6

Browse files
Merge branch 'main' into main
2 parents cd4f55e + 51e222e commit 0eee6f6

File tree

76 files changed

+14674
-999
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+14674
-999
lines changed

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1783,10 +1783,22 @@ bool BinaryFunction::scanExternalRefs() {
17831783
// On AArch64, we use instruction patches for fixing references. We make an
17841784
// exception for branch instructions since they require optional
17851785
// relocations.
1786-
if (BC.isAArch64() && !BranchTargetSymbol) {
1787-
LLVM_DEBUG(BC.printInstruction(dbgs(), Instruction, AbsoluteInstrAddr));
1788-
InstructionPatches.push_back({AbsoluteInstrAddr, Instruction});
1789-
continue;
1786+
if (BC.isAArch64()) {
1787+
if (!BranchTargetSymbol) {
1788+
LLVM_DEBUG(BC.printInstruction(dbgs(), Instruction, AbsoluteInstrAddr));
1789+
InstructionPatches.push_back({AbsoluteInstrAddr, Instruction});
1790+
continue;
1791+
}
1792+
1793+
// Conditional tail calls require new relocation types that are currently
1794+
// not supported. https://github.com/llvm/llvm-project/issues/138264
1795+
if (BC.MIB->isConditionalBranch(Instruction)) {
1796+
if (BinaryFunction *TargetBF =
1797+
BC.getFunctionForSymbol(BranchTargetSymbol)) {
1798+
TargetBF->setNeedsPatch(true);
1799+
continue;
1800+
}
1801+
}
17901802
}
17911803

17921804
// Emit the instruction using temp emitter and generate relocations.

bolt/test/AArch64/lite-mode.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,15 @@ cold_function:
129129
# CHECK-INPUT-NEXT: b {{.*}} <_start>
130130
# CHECK-NEXT: b {{.*}} <_start.org.0>
131131

132+
## Quick test for conditional tail calls. A proper test is being added in:
133+
## https://github.com/llvm/llvm-project/pull/139565
134+
## For now check that llvm-bolt doesn't choke on CTCs.
135+
.ifndef COMPACT
136+
b.eq _start
137+
cbz x0, _start
138+
tbz x0, 42, _start
139+
.endif
140+
132141
.cfi_endproc
133142
.size cold_function, .-cold_function
134143

clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,20 @@ class OpenACCClauseCIREmitter final
164164
builder.setInsertionPoint(operation.computeOp);
165165
OpenACCClauseCIREmitter<typename OpTy::ComputeOpTy> computeEmitter{
166166
operation.computeOp, cgf, builder, dirKind, dirLoc};
167+
167168
computeEmitter.lastDeviceTypeValues = lastDeviceTypeValues;
169+
170+
// Async handler uses the first data operand to figure out where to insert
171+
// its information if it is present. This ensures that the new handler will
172+
// correctly set the insertion point for async.
173+
if (!dataOperands.empty())
174+
computeEmitter.dataOperands.push_back(dataOperands.front());
168175
computeEmitter.Visit(&c);
176+
177+
// Make sure all of the new data operands are kept track of here. The
178+
// combined constructs always apply 'async' to only the compute component,
179+
// so we need to collect these.
180+
dataOperands.append(computeEmitter.dataOperands);
169181
}
170182

171183
struct DataOperandInfo {
@@ -254,6 +266,8 @@ class OpenACCClauseCIREmitter final
254266
if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp,
255267
mlir::acc::KernelsOp, mlir::acc::DataOp>)
256268
return operation.getAsyncOnlyAttr();
269+
else if constexpr (isCombinedType<OpTy>)
270+
return operation.computeOp.getAsyncOnlyAttr();
257271

258272
// Note: 'wait' has async as well, but it cannot have data clauses, so we
259273
// don't have to handle them here.
@@ -267,6 +281,8 @@ class OpenACCClauseCIREmitter final
267281
if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp,
268282
mlir::acc::KernelsOp, mlir::acc::DataOp>)
269283
return operation.getAsyncOperandsDeviceTypeAttr();
284+
else if constexpr (isCombinedType<OpTy>)
285+
return operation.computeOp.getAsyncOperandsDeviceTypeAttr();
270286

271287
// Note: 'wait' has async as well, but it cannot have data clauses, so we
272288
// don't have to handle them here.
@@ -281,6 +297,8 @@ class OpenACCClauseCIREmitter final
281297
if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp,
282298
mlir::acc::KernelsOp, mlir::acc::DataOp>)
283299
return operation.getAsyncOperands();
300+
else if constexpr (isCombinedType<OpTy>)
301+
return operation.computeOp.getAsyncOperands();
284302

285303
// Note: 'wait' has async as well, but it cannot have data clauses, so we
286304
// don't have to handle them here.
@@ -296,8 +314,6 @@ class OpenACCClauseCIREmitter final
296314
if (!hasAsyncClause || dataOperands.empty())
297315
return;
298316

299-
// TODO: OpenACC: Handle this correctly for combined constructs.
300-
301317
for (mlir::Operation *dataOp : dataOperands) {
302318
llvm::TypeSwitch<mlir::Operation *, void>(dataOp)
303319
.Case<ACC_DATA_ENTRY_OPS, ACC_DATA_EXIT_OPS>([&](auto op) {
@@ -708,6 +724,8 @@ class OpenACCClauseCIREmitter final
708724
addDataOperand<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
709725
var, mlir::acc::DataClause::acc_copy, /*structured=*/true,
710726
/*implicit=*/false);
727+
} else if constexpr (isCombinedType<OpTy>) {
728+
applyToComputeOp(clause);
711729
} else {
712730
// TODO: When we've implemented this for everything, switch this to an
713731
// unreachable. data, declare, combined constructs remain.

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2694,14 +2694,20 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
26942694

26952695
void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
26962696
LValue Dst) {
2697+
llvm::Value *SrcVal = Src.getScalarVal();
2698+
Address DstAddr = Dst.getExtVectorAddress();
2699+
if (DstAddr.getElementType()->getScalarSizeInBits() >
2700+
SrcVal->getType()->getScalarSizeInBits())
2701+
SrcVal = Builder.CreateZExt(
2702+
SrcVal, convertTypeForLoadStore(Dst.getType(), SrcVal->getType()));
2703+
26972704
// HLSL allows storing to scalar values through ExtVector component LValues.
26982705
// To support this we need to handle the case where the destination address is
26992706
// a scalar.
2700-
Address DstAddr = Dst.getExtVectorAddress();
27012707
if (!DstAddr.getElementType()->isVectorTy()) {
27022708
assert(!Dst.getType()->isVectorType() &&
27032709
"this should only occur for non-vector l-values");
2704-
Builder.CreateStore(Src.getScalarVal(), DstAddr, Dst.isVolatileQualified());
2710+
Builder.CreateStore(SrcVal, DstAddr, Dst.isVolatileQualified());
27052711
return;
27062712
}
27072713

@@ -2722,11 +2728,6 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
27222728
for (unsigned i = 0; i != NumSrcElts; ++i)
27232729
Mask[getAccessedFieldNo(i, Elts)] = i;
27242730

2725-
llvm::Value *SrcVal = Src.getScalarVal();
2726-
if (VecTy->getScalarSizeInBits() >
2727-
SrcVal->getType()->getScalarSizeInBits())
2728-
SrcVal = Builder.CreateZExt(SrcVal, VecTy);
2729-
27302731
Vec = Builder.CreateShuffleVector(SrcVal, Mask);
27312732
} else if (NumDstElts > NumSrcElts) {
27322733
// Extended the source vector to the same length and then shuffle it
@@ -2737,8 +2738,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
27372738
for (unsigned i = 0; i != NumSrcElts; ++i)
27382739
ExtMask.push_back(i);
27392740
ExtMask.resize(NumDstElts, -1);
2740-
llvm::Value *ExtSrcVal =
2741-
Builder.CreateShuffleVector(Src.getScalarVal(), ExtMask);
2741+
llvm::Value *ExtSrcVal = Builder.CreateShuffleVector(SrcVal, ExtMask);
27422742
// build identity
27432743
SmallVector<int, 4> Mask;
27442744
for (unsigned i = 0; i != NumDstElts; ++i)
@@ -2764,10 +2764,6 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
27642764
unsigned InIdx = getAccessedFieldNo(0, Elts);
27652765
llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx);
27662766

2767-
llvm::Value *SrcVal = Src.getScalarVal();
2768-
if (VecTy->getScalarSizeInBits() > SrcVal->getType()->getScalarSizeInBits())
2769-
SrcVal = Builder.CreateZExt(SrcVal, VecTy->getScalarType());
2770-
27712767
Vec = Builder.CreateInsertElement(Vec, SrcVal, Elt);
27722768
}
27732769

clang/lib/Sema/SemaInit.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6602,8 +6602,10 @@ void InitializationSequence::InitializeFrom(Sema &S,
66026602
}
66036603
// If the record has any members which are const (recursively checked),
66046604
// then we want to diagnose those as being uninitialized if there is no
6605-
// initializer present.
6606-
if (!Initializer) {
6605+
// initializer present. However, we only do this for structure types, not
6606+
// union types, because an unitialized field in a union is generally
6607+
// reasonable, especially in C where unions can be used for type punning.
6608+
if (!Initializer && !Rec->isUnion()) {
66076609
if (const FieldDecl *FD = getConstField(Rec)) {
66086610
unsigned DiagID = diag::warn_default_init_const_field_unsafe;
66096611
if (Var->getStorageDuration() == SD_Static ||

clang/lib/Sema/SemaOpenACC.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -757,11 +757,12 @@ ExprResult SemaOpenACC::ActOnArraySectionExpr(Expr *Base, SourceLocation LBLoc,
757757
!OriginalBaseTy->isConstantArrayType() &&
758758
!OriginalBaseTy->isDependentSizedArrayType()))) {
759759
bool IsArray = !OriginalBaseTy.isNull() && OriginalBaseTy->isArrayType();
760-
Diag(ColonLoc, diag::err_acc_subarray_no_length) << IsArray;
760+
SourceLocation DiagLoc = ColonLoc.isInvalid() ? LBLoc : ColonLoc;
761+
Diag(DiagLoc, diag::err_acc_subarray_no_length) << IsArray;
761762
// Fill in a dummy 'length' so that when we instantiate this we don't
762763
// double-diagnose here.
763764
ExprResult Recovery = SemaRef.CreateRecoveryExpr(
764-
ColonLoc, SourceLocation(), ArrayRef<Expr *>(), Context.IntTy);
765+
DiagLoc, SourceLocation(), ArrayRef<Expr *>(), Context.IntTy);
765766
Length = Recovery.isUsable() ? Recovery.get() : nullptr;
766767
}
767768

0 commit comments

Comments
 (0)