Skip to content

Commit f37c9af

Browse files
Merge branch 'main' into sqrttest
2 parents efaa8f7 + 6cd62ad commit f37c9af

File tree

10 files changed

+115
-33
lines changed

10 files changed

+115
-33
lines changed

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2936,9 +2936,8 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
29362936
bool IsThunk) {
29372937

29382938
if (llvm::Intrinsic::ID IID = F->getIntrinsicID()) {
2939-
// If this is an intrinsic function, set the function's attributes
2940-
// to the intrinsic's attributes.
2941-
F->setAttributes(llvm::Intrinsic::getAttributes(getLLVMContext(), IID));
2939+
// If this is an intrinsic function, the attributes will have been set
2940+
// when the function was created.
29422941
return;
29432942
}
29442943

libclc/cmake/modules/AddLibclc.cmake

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,13 @@ function(link_bc)
107107
set( LINK_INPUT_ARG "@${RSP_FILE}" )
108108
endif()
109109

110+
if( ARG_INTERNALIZE )
111+
set( link_flags --internalize --only-needed )
112+
endif()
113+
110114
add_custom_command(
111115
OUTPUT ${ARG_TARGET}.bc
112-
COMMAND ${llvm-link_exe} $<$<BOOL:${ARG_INTERNALIZE}>:--internalize> -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG}
116+
COMMAND ${llvm-link_exe} ${link_flags} -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG}
113117
DEPENDS ${llvm-link_target} ${ARG_DEPENDENCIES} ${ARG_INPUTS} ${RSP_FILE}
114118
)
115119

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,6 +1167,7 @@ SmallVector<Instruction *, 4>
11671167
RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
11681168
SmallVector<Instruction *, 4> ReductionOperations;
11691169
unsigned RedOp = getOpcode();
1170+
const bool IsMinMax = isMinMaxRecurrenceKind(Kind);
11701171

11711172
// Search down from the Phi to the LoopExitInstr, looking for instructions
11721173
// with a single user of the correct type for the reduction.
@@ -1184,15 +1185,15 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
11841185
// more expensive than out-of-loop reductions, and need to be costed more
11851186
// carefully.
11861187
unsigned ExpectedUses = 1;
1187-
if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp)
1188+
if (IsMinMax)
11881189
ExpectedUses = 2;
11891190

11901191
auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
11911192
for (auto *User : Cur->users()) {
11921193
Instruction *UI = cast<Instruction>(User);
11931194
if (isa<PHINode>(UI))
11941195
continue;
1195-
if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
1196+
if (IsMinMax) {
11961197
// We are expecting a icmp/select pair, which we go to the next select
11971198
// instruction if we can. We already know that Cur has 2 uses.
11981199
if (isa<SelectInst>(UI))
@@ -1204,7 +1205,7 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
12041205
return nullptr;
12051206
};
12061207
auto isCorrectOpcode = [&](Instruction *Cur) {
1207-
if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
1208+
if (IsMinMax) {
12081209
Value *LHS, *RHS;
12091210
return SelectPatternResult::isMinOrMax(
12101211
matchSelectPattern(Cur, LHS, RHS).Flavor);

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,13 +1491,22 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
14911491
if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
14921492
return PredOpcode;
14931493

1494+
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1495+
1496+
// If the PTEST like instruction's general predicate is not `Mask`, attempt
1497+
// to look through a copy and try again. This is because some instructions
1498+
// take a predicate whose register class is a subset of its result class.
1499+
if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1500+
PTestLikeMask->getOperand(1).getReg().isVirtual())
1501+
PTestLikeMask =
1502+
MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1503+
14941504
// For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
14951505
// the element size matches and either the PTEST_LIKE instruction uses
14961506
// the same all active mask or the condition is "any".
14971507
if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
14981508
getElementSizeForOpcode(MaskOpcode) ==
14991509
getElementSizeForOpcode(PredOpcode)) {
1500-
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
15011510
if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
15021511
return PredOpcode;
15031512
}
@@ -1524,7 +1533,6 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
15241533
// active flag, whereas the PTEST instruction with the same mask doesn't.
15251534
// For PTEST_ANY this doesn't apply as the flags in this case would be
15261535
// identical regardless of element size.
1527-
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
15281536
uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
15291537
if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
15301538
PTest->getOpcode() == AArch64::PTEST_PP_ANY))

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19724,10 +19724,18 @@ bool SLPVectorizerPass::vectorizeStores(
1972419724
Type *ValueTy = StoreTy;
1972519725
if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
1972619726
ValueTy = Trunc->getSrcTy();
19727-
unsigned MinVF = std::max<unsigned>(
19728-
2, PowerOf2Ceil(TTI->getStoreMinimumVF(
19729-
R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy,
19730-
ValueTy)));
19727+
// When REVEC is enabled, StoreTy and ValueTy may be FixedVectorType. But
19728+
// getStoreMinimumVF only support scalar type as arguments. As a result,
19729+
// we need to use the element type of StoreTy and ValueTy to retrieve the
19730+
// VF and then transform it back.
19731+
// Remember: VF is defined as the number we want to vectorize, not the
19732+
// number of elements in the final vector.
19733+
Type *StoreScalarTy = StoreTy->getScalarType();
19734+
unsigned MinVF = PowerOf2Ceil(TTI->getStoreMinimumVF(
19735+
R.getMinVF(DL->getTypeStoreSizeInBits(StoreScalarTy)), StoreScalarTy,
19736+
ValueTy->getScalarType()));
19737+
MinVF /= getNumElements(StoreTy);
19738+
MinVF = std::max<unsigned>(2, MinVF);
1973119739

1973219740
if (MaxVF < MinVF) {
1973319741
LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,3 +661,48 @@ body: |
661661
RET_ReallyLR implicit $w0
662662
663663
...
664+
---
665+
name: cmpeq_nxv16i8_ptest_with_register_class_mismatch
666+
alignment: 2
667+
tracksRegLiveness: true
668+
registers:
669+
- { id: 0, class: ppr }
670+
- { id: 1, class: zpr }
671+
- { id: 2, class: zpr }
672+
- { id: 3, class: ppr_3b }
673+
- { id: 4, class: ppr }
674+
- { id: 5, class: gpr32 }
675+
- { id: 6, class: gpr32 }
676+
liveins:
677+
- { reg: '$z0', virtual-reg: '%1' }
678+
- { reg: '$z1', virtual-reg: '%2' }
679+
frameInfo:
680+
maxCallFrameSize: 0
681+
body: |
682+
bb.0:
683+
liveins: $z0, $z1
684+
685+
; CHECK-LABEL: name: cmpeq_nxv16i8_ptest_with_register_class_mismatch
686+
; CHECK: liveins: $z0, $z1
687+
; CHECK-NEXT: {{ $}}
688+
; CHECK-NEXT: [[COPY:%[0-9]+]]:zpr = COPY $z0
689+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:zpr = COPY $z1
690+
; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
691+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr_3b = COPY [[PTRUE_B]]
692+
; CHECK-NEXT: [[CMPEQ_PPzZZ_B:%[0-9]+]]:ppr = CMPEQ_PPzZZ_B [[COPY2]], [[COPY]], [[COPY1]], implicit-def $nzcv
693+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $wzr
694+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY3]], $wzr, 0, implicit $nzcv
695+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
696+
; CHECK-NEXT: RET_ReallyLR implicit $w0
697+
%1:zpr = COPY $z0
698+
%2:zpr = COPY $z1
699+
%0:ppr = PTRUE_B 31, implicit $vg
700+
%3:ppr_3b = COPY %0
701+
%4:ppr = CMPEQ_PPzZZ_B %3, %1, %2, implicit-def dead $nzcv
702+
PTEST_PP %0, killed %4, implicit-def $nzcv
703+
%5:gpr32 = COPY $wzr
704+
%6:gpr32 = CSINCWr %5, $wzr, 0, implicit $nzcv
705+
$w0 = COPY %6
706+
RET_ReallyLR implicit $w0
707+
708+
...
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: @test(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> zeroinitializer, i64 0)
8+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP0]], <4 x i8> zeroinitializer, i64 4)
9+
; CHECK-NEXT: store <8 x i8> [[TMP1]], ptr null, align 1
10+
; CHECK-NEXT: ret void
11+
;
12+
entry:
13+
%0 = getelementptr i8, ptr null, i64 4
14+
store <4 x i8> zeroinitializer, ptr null, align 1
15+
store <4 x i8> zeroinitializer, ptr %0, align 1
16+
ret void
17+
}

mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES_H
33

44
#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
5+
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
56
#include "mlir/Dialect/MemRef/IR/MemRef.h"
67
#include "mlir/Pass/Pass.h"
78

mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,15 @@ def EmptyTensorToAllocTensorPass : Pass<"empty-tensor-to-alloc-tensor"> {
284284
let dependentDialects = ["tensor::TensorDialect"];
285285
}
286286

287+
def layoutMapClValues {
288+
string values = [{
289+
::llvm::cl::values(
290+
clEnumValN(LayoutMapOption::InferLayoutMap, "infer-layout-map", ""),
291+
clEnumValN(LayoutMapOption::IdentityLayoutMap, "identity-layout-map", ""),
292+
clEnumValN(LayoutMapOption::FullyDynamicLayoutMap, "fully-dynamic-layout-map", "")
293+
)}];
294+
}
295+
287296
def OneShotBufferizePass : Pass<"one-shot-bufferize", "ModuleOp"> {
288297
let summary = "One-Shot Bufferize";
289298
let description = [{
@@ -424,9 +433,10 @@ def OneShotBufferizePass : Pass<"one-shot-bufferize", "ModuleOp"> {
424433
"Skip analysis of functions with these symbol names."
425434
"Set copyBeforeWrite to true when bufferizing them.">,
426435
Option<"functionBoundaryTypeConversion",
427-
"function-boundary-type-conversion", "std::string",
428-
/*default=*/"\"infer-layout-map\"",
429-
"Controls layout maps when bufferizing function signatures.">,
436+
"function-boundary-type-conversion", "LayoutMapOption",
437+
/*default=*/"LayoutMapOption::InferLayoutMap",
438+
"Controls layout maps when bufferizing function signatures.",
439+
layoutMapClValues.values>,
430440
Option<"mustInferMemorySpace", "must-infer-memory-space", "bool",
431441
/*default=*/"false",
432442
"The memory space of an memref types must always be inferred. If "
@@ -444,9 +454,10 @@ def OneShotBufferizePass : Pass<"one-shot-bufferize", "ModuleOp"> {
444454
/*default=*/"false",
445455
"Test only: Annotate IR with RaW conflicts. Requires "
446456
"test-analysis-only.">,
447-
Option<"unknownTypeConversion", "unknown-type-conversion", "std::string",
448-
/*default=*/"\"fully-dynamic-layout-map\"",
449-
"Controls layout maps for non-inferrable memref types.">,
457+
Option<"unknownTypeConversion", "unknown-type-conversion", "LayoutMapOption",
458+
/*default=*/"LayoutMapOption::FullyDynamicLayoutMap",
459+
"Controls layout maps for non-inferrable memref types.",
460+
layoutMapClValues.values>,
450461
Option<"bufferAlignment", "buffer-alignment", "uint64_t",
451462
/*default=*/"64",
452463
"Sets the alignment of newly allocated buffers.">,

mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,6 @@ using namespace mlir::bufferization;
3838

3939
namespace {
4040

41-
static LayoutMapOption parseLayoutMapOption(const std::string &s) {
42-
if (s == "fully-dynamic-layout-map")
43-
return LayoutMapOption::FullyDynamicLayoutMap;
44-
if (s == "identity-layout-map")
45-
return LayoutMapOption::IdentityLayoutMap;
46-
if (s == "infer-layout-map")
47-
return LayoutMapOption::InferLayoutMap;
48-
llvm_unreachable("invalid layout map option");
49-
}
50-
5141
static OneShotBufferizationOptions::AnalysisHeuristic
5242
parseHeuristicOption(const std::string &s) {
5343
if (s == "bottom-up")
@@ -83,8 +73,7 @@ struct OneShotBufferizePass
8373
opt.analysisHeuristic = parseHeuristicOption(analysisHeuristic);
8474
opt.copyBeforeWrite = copyBeforeWrite;
8575
opt.dumpAliasSets = dumpAliasSets;
86-
opt.setFunctionBoundaryTypeConversion(
87-
parseLayoutMapOption(functionBoundaryTypeConversion));
76+
opt.setFunctionBoundaryTypeConversion(functionBoundaryTypeConversion);
8877

8978
if (mustInferMemorySpace && useEncodingForMemorySpace) {
9079
emitError(getOperation()->getLoc())
@@ -118,8 +107,7 @@ struct OneShotBufferizePass
118107
opt.noAnalysisFuncFilter = noAnalysisFuncFilter;
119108

120109
// Configure type converter.
121-
LayoutMapOption unknownTypeConversionOption =
122-
parseLayoutMapOption(unknownTypeConversion);
110+
LayoutMapOption unknownTypeConversionOption = unknownTypeConversion;
123111
if (unknownTypeConversionOption == LayoutMapOption::InferLayoutMap) {
124112
emitError(UnknownLoc::get(&getContext()),
125113
"Invalid option: 'infer-layout-map' is not a valid value for "

0 commit comments

Comments
 (0)