Skip to content
This repository was archived by the owner on Sep 15, 2025. It is now read-only.

Commit a0b628d

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:39ac64c1c0fc61a476aa22c53e6977608ead03cf into amd-gfx:b8f516d9c3d8
Local branch amd-gfx b8f516d Merged main:fb33af08e4c105a05855f8beeb972d493410e72f into amd-gfx:1bf507427ecd Remote branch main 39ac64c [mlir][Arith] ValueBoundsInterface: speedup arith.select (llvm#113531)
2 parents b8f516d + 39ac64c commit a0b628d

File tree

8 files changed

+94
-39
lines changed

8 files changed

+94
-39
lines changed

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 516202
19+
#define LLVM_MAIN_REVISION 516206
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ enum AsmWriterFlavorTy {
2323
ATT = 0, Intel = 1
2424
};
2525

26-
static cl::opt<AsmWriterFlavorTy> AsmWriterFlavor(
26+
static cl::opt<AsmWriterFlavorTy> X86AsmSyntax(
2727
"x86-asm-syntax", cl::init(ATT), cl::Hidden,
28-
cl::desc("Choose style of code to emit from X86 backend:"),
28+
cl::desc("Select the assembly style for input"),
2929
cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"),
3030
clEnumValN(Intel, "intel", "Emit Intel-style assembly")));
3131

@@ -41,7 +41,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
4141
if (is64Bit)
4242
CodePointerSize = CalleeSaveStackSlotSize = 8;
4343

44-
AssemblerDialect = AsmWriterFlavor;
44+
AssemblerDialect = X86AsmSyntax;
4545

4646
if (!is64Bit)
4747
Data64bitsDirective = nullptr; // we can't emit a 64-bit unit
@@ -89,7 +89,7 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
8989
// OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI.
9090
CalleeSaveStackSlotSize = is64Bit ? 8 : 4;
9191

92-
AssemblerDialect = AsmWriterFlavor;
92+
AssemblerDialect = X86AsmSyntax;
9393

9494
// Debug Information
9595
SupportsDebugInformation = true;
@@ -126,7 +126,7 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
126126

127127
ExceptionsType = ExceptionHandling::WinEH;
128128

129-
AssemblerDialect = AsmWriterFlavor;
129+
AssemblerDialect = X86AsmSyntax;
130130

131131
AllowAtInName = true;
132132
}
@@ -159,7 +159,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
159159
ExceptionsType = ExceptionHandling::DwarfCFI;
160160
}
161161

162-
AssemblerDialect = AsmWriterFlavor;
162+
AssemblerDialect = X86AsmSyntax;
163163

164164
AllowAtInName = true;
165165
}

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2406,7 +2406,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
24062406
addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
24072407
}
24082408

2409-
if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
2409+
if (!Subtarget.useSoftFloat() && Subtarget.hasBF16() &&
2410+
Subtarget.useAVX512Regs()) {
24102411
addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
24112412
setF16Action(MVT::v32bf16, Expand);
24122413
for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV})
@@ -2419,27 +2420,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
24192420
}
24202421

24212422
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX10_2()) {
2422-
addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
2423-
addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
2424-
addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
2425-
2426-
setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
2427-
setOperationAction(ISD::FSUB, MVT::v32bf16, Legal);
2428-
setOperationAction(ISD::FMUL, MVT::v32bf16, Legal);
2429-
setOperationAction(ISD::FDIV, MVT::v32bf16, Legal);
2430-
setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
2431-
setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
2432-
setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
2433-
if (Subtarget.hasVLX()) {
2434-
for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2435-
setOperationAction(ISD::FADD, VT, Legal);
2436-
setOperationAction(ISD::FSUB, VT, Legal);
2437-
setOperationAction(ISD::FMUL, VT, Legal);
2438-
setOperationAction(ISD::FDIV, VT, Legal);
2439-
setOperationAction(ISD::FSQRT, VT, Legal);
2440-
setOperationAction(ISD::FMA, VT, Legal);
2441-
setOperationAction(ISD::SETCC, VT, Custom);
2442-
}
2423+
for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2424+
setOperationAction(ISD::FADD, VT, Legal);
2425+
setOperationAction(ISD::FSUB, VT, Legal);
2426+
setOperationAction(ISD::FMUL, VT, Legal);
2427+
setOperationAction(ISD::FDIV, VT, Legal);
2428+
setOperationAction(ISD::FSQRT, VT, Legal);
2429+
setOperationAction(ISD::FMA, VT, Legal);
2430+
setOperationAction(ISD::SETCC, VT, Custom);
2431+
}
2432+
if (Subtarget.hasAVX10_2_512()) {
2433+
setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
2434+
setOperationAction(ISD::FSUB, MVT::v32bf16, Legal);
2435+
setOperationAction(ISD::FMUL, MVT::v32bf16, Legal);
2436+
setOperationAction(ISD::FDIV, MVT::v32bf16, Legal);
2437+
setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
2438+
setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
2439+
setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
24432440
}
24442441
}
24452442

llvm/test/CodeGen/X86/avx10_2bf16-arith.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,3 +1166,25 @@ entry:
11661166
%2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
11671167
ret <8 x bfloat> %2
11681168
}
1169+
1170+
define <32 x bfloat> @addv(<32 x bfloat> %a, <32 x bfloat> %b) nounwind {
1171+
; X64-LABEL: addv:
1172+
; X64: # %bb.0:
1173+
; X64-NEXT: vaddnepbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2]
1174+
; X64-NEXT: vaddnepbf16 %ymm3, %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xcb]
1175+
; X64-NEXT: retq # encoding: [0xc3]
1176+
;
1177+
; X86-LABEL: addv:
1178+
; X86: # %bb.0:
1179+
; X86-NEXT: pushl %ebp # encoding: [0x55]
1180+
; X86-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
1181+
; X86-NEXT: andl $-32, %esp # encoding: [0x83,0xe4,0xe0]
1182+
; X86-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20]
1183+
; X86-NEXT: vaddnepbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2]
1184+
; X86-NEXT: vaddnepbf16 8(%ebp), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0x8d,0x08,0x00,0x00,0x00]
1185+
; X86-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
1186+
; X86-NEXT: popl %ebp # encoding: [0x5d]
1187+
; X86-NEXT: retl # encoding: [0xc3]
1188+
%add = fadd <32 x bfloat> %a, %b
1189+
ret <32 x bfloat> %add
1190+
}

mlir/lib/Conversion/MathToFuncs/MathToFuncs.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,9 @@ struct ConvertMathToFuncsPass
781781
// or equal to minWidthOfFPowIExponent option value.
782782
bool isFPowIConvertible(math::FPowIOp op);
783783

784+
// Reture true, if operation is integer type.
785+
bool isConvertible(Operation *op);
786+
784787
// Generate outlined implementations for power operations
785788
// and store them in funcImpls map.
786789
void generateOpImplementations();
@@ -798,13 +801,17 @@ bool ConvertMathToFuncsPass::isFPowIConvertible(math::FPowIOp op) {
798801
return (expTy && expTy.getWidth() >= minWidthOfFPowIExponent);
799802
}
800803

804+
bool ConvertMathToFuncsPass::isConvertible(Operation *op) {
805+
return isa<IntegerType>(getElementTypeOrSelf(op->getResult(0).getType()));
806+
}
807+
801808
void ConvertMathToFuncsPass::generateOpImplementations() {
802809
ModuleOp module = getOperation();
803810

804811
module.walk([&](Operation *op) {
805812
TypeSwitch<Operation *>(op)
806813
.Case<math::CountLeadingZerosOp>([&](math::CountLeadingZerosOp op) {
807-
if (!convertCtlz)
814+
if (!convertCtlz || !isConvertible(op))
808815
return;
809816
Type resultType = getElementTypeOrSelf(op.getResult().getType());
810817

@@ -816,6 +823,9 @@ void ConvertMathToFuncsPass::generateOpImplementations() {
816823
entry.first->second = createCtlzFunc(&module, resultType);
817824
})
818825
.Case<math::IPowIOp>([&](math::IPowIOp op) {
826+
if (!isConvertible(op))
827+
return;
828+
819829
Type resultType = getElementTypeOrSelf(op.getResult().getType());
820830

821831
// Generate the software implementation of this operation,
@@ -873,9 +883,12 @@ void ConvertMathToFuncsPass::runOnOperation() {
873883
func::FuncDialect, scf::SCFDialect,
874884
vector::VectorDialect>();
875885

876-
target.addIllegalOp<math::IPowIOp>();
877-
if (convertCtlz)
878-
target.addIllegalOp<math::CountLeadingZerosOp>();
886+
target.addDynamicallyLegalOp<math::IPowIOp>(
887+
[this](math::IPowIOp op) { return !isConvertible(op); });
888+
if (convertCtlz) {
889+
target.addDynamicallyLegalOp<math::CountLeadingZerosOp>(
890+
[this](math::CountLeadingZerosOp op) { return !isConvertible(op); });
891+
}
879892
target.addDynamicallyLegalOp<math::FPowIOp>(
880893
[this](math::FPowIOp op) { return !isFPowIConvertible(op); });
881894
if (failed(applyPartialConversion(module, target, std::move(patterns))))

mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,10 @@ struct SelectOpInterface
107107
// If trueValue <= falseValue:
108108
// * result <= falseValue
109109
// * result >= trueValue
110-
if (cstr.compare(/*lhs=*/{trueValue, dim},
111-
ValueBoundsConstraintSet::ComparisonOperator::LE,
112-
/*rhs=*/{falseValue, dim})) {
110+
if (cstr.populateAndCompare(
111+
/*lhs=*/{trueValue, dim},
112+
ValueBoundsConstraintSet::ComparisonOperator::LE,
113+
/*rhs=*/{falseValue, dim})) {
113114
if (dim) {
114115
cstr.bound(value)[*dim] >= cstr.getExpr(trueValue, dim);
115116
cstr.bound(value)[*dim] <= cstr.getExpr(falseValue, dim);
@@ -121,9 +122,10 @@ struct SelectOpInterface
121122
// If falseValue <= trueValue:
122123
// * result <= trueValue
123124
// * result >= falseValue
124-
if (cstr.compare(/*lhs=*/{falseValue, dim},
125-
ValueBoundsConstraintSet::ComparisonOperator::LE,
126-
/*rhs=*/{trueValue, dim})) {
125+
if (cstr.populateAndCompare(
126+
/*lhs=*/{falseValue, dim},
127+
ValueBoundsConstraintSet::ComparisonOperator::LE,
128+
/*rhs=*/{trueValue, dim})) {
127129
if (dim) {
128130
cstr.bound(value)[*dim] >= cstr.getExpr(falseValue, dim);
129131
cstr.bound(value)[*dim] <= cstr.getExpr(trueValue, dim);

mlir/test/Conversion/MathToFuncs/ctlz.mlir

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,13 @@ func.func @main(%arg0: i8) {
9191
func.return
9292
}
9393

94+
// -----
95+
96+
// Check that index is not converted
97+
98+
// CHECK-LABEL: func.func @ctlz_index
99+
// CHECK: math.ctlz
100+
func.func @ctlz_index(%arg0: index) {
101+
%0 = math.ctlz %arg0 : index
102+
func.return
103+
}

mlir/test/Conversion/MathToFuncs/ipowi.mlir

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,14 @@ func.func @ipowi_vec(%arg0: vector<2x3xi64>, %arg1: vector<2x3xi64>) {
170170
%0 = math.ipowi %arg0, %arg1 : vector<2x3xi64>
171171
func.return
172172
}
173+
174+
// -----
175+
176+
// Check that index is not converted
177+
178+
// CHECK-LABEL: func.func @ipowi_index
179+
// CHECK: math.ipowi
180+
func.func @ipowi_index(%arg0: index, %arg1: index) {
181+
%0 = math.ipowi %arg0, %arg1 : index
182+
func.return
183+
}

0 commit comments

Comments
 (0)