Merged main:39ac64c1c0fc61a476aa22c53e6977608ead03cf into amd-gfx:b8f516d9c3d8

SC llvm team · SC llvm team · commit a0b628df54bb · 2024-10-27T22:43:57.000-04:00
Local branch amd-gfx b8f516d Merged main:fb33af08e4c105a05855f8beeb972d493410e72f into amd-gfx:1bf507427ecd Remote branch main 39ac64c [mlir][Arith] ValueBoundsInterface: speedup arith.select (llvm#113531)
diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake
@@ -16,7 +16,7 @@
 
 /* Indicate that this is LLVM compiled from the amd-gfx branch. */
 #define LLVM_HAVE_BRANCH_AMD_GFX
-#define LLVM_MAIN_REVISION 516202
+#define LLVM_MAIN_REVISION 516206
 
 /* Define if LLVM_ENABLE_DUMP is enabled */
 #cmakedefine LLVM_ENABLE_DUMP
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -23,9 +23,9 @@ enum AsmWriterFlavorTy {
   ATT = 0, Intel = 1
 };
 
-static cl::opt<AsmWriterFlavorTy> AsmWriterFlavor(
+static cl::opt<AsmWriterFlavorTy> X86AsmSyntax(
     "x86-asm-syntax", cl::init(ATT), cl::Hidden,
-    cl::desc("Choose style of code to emit from X86 backend:"),
+    cl::desc("Select the assembly style for input"),
     cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"),
                clEnumValN(Intel, "intel", "Emit Intel-style assembly")));
 
@@ -41,7 +41,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
   if (is64Bit)
     CodePointerSize = CalleeSaveStackSlotSize = 8;
 
-  AssemblerDialect = AsmWriterFlavor;
+  AssemblerDialect = X86AsmSyntax;
 
   if (!is64Bit)
     Data64bitsDirective = nullptr;       // we can't emit a 64-bit unit
@@ -89,7 +89,7 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
   // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI.
   CalleeSaveStackSlotSize = is64Bit ? 8 : 4;
 
-  AssemblerDialect = AsmWriterFlavor;
+  AssemblerDialect = X86AsmSyntax;
 
   // Debug Information
   SupportsDebugInformation = true;
@@ -126,7 +126,7 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
 
   ExceptionsType = ExceptionHandling::WinEH;
 
-  AssemblerDialect = AsmWriterFlavor;
+  AssemblerDialect = X86AsmSyntax;
 
   AllowAtInName = true;
 }
@@ -159,7 +159,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
     ExceptionsType = ExceptionHandling::DwarfCFI;
   }
 
-  AssemblerDialect = AsmWriterFlavor;
+  AssemblerDialect = X86AsmSyntax;
 
   AllowAtInName = true;
 }
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2406,7 +2406,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
   }
 
-  if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
+  if (!Subtarget.useSoftFloat() && Subtarget.hasBF16() &&
+      Subtarget.useAVX512Regs()) {
     addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
     setF16Action(MVT::v32bf16, Expand);
     for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV})
@@ -2419,27 +2420,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasAVX10_2()) {
-    addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
-    addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
-    addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
-
-    setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
-    setOperationAction(ISD::FSUB, MVT::v32bf16, Legal);
-    setOperationAction(ISD::FMUL, MVT::v32bf16, Legal);
-    setOperationAction(ISD::FDIV, MVT::v32bf16, Legal);
-    setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
-    setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
-    setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
-    if (Subtarget.hasVLX()) {
-      for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
-        setOperationAction(ISD::FADD, VT, Legal);
-        setOperationAction(ISD::FSUB, VT, Legal);
-        setOperationAction(ISD::FMUL, VT, Legal);
-        setOperationAction(ISD::FDIV, VT, Legal);
-        setOperationAction(ISD::FSQRT, VT, Legal);
-        setOperationAction(ISD::FMA, VT, Legal);
-        setOperationAction(ISD::SETCC, VT, Custom);
-      }
+    for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
+      setOperationAction(ISD::FADD, VT, Legal);
+      setOperationAction(ISD::FSUB, VT, Legal);
+      setOperationAction(ISD::FMUL, VT, Legal);
+      setOperationAction(ISD::FDIV, VT, Legal);
+      setOperationAction(ISD::FSQRT, VT, Legal);
+      setOperationAction(ISD::FMA, VT, Legal);
+      setOperationAction(ISD::SETCC, VT, Custom);
+    }
+    if (Subtarget.hasAVX10_2_512()) {
+      setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
+      setOperationAction(ISD::FSUB, MVT::v32bf16, Legal);
+      setOperationAction(ISD::FMUL, MVT::v32bf16, Legal);
+      setOperationAction(ISD::FDIV, MVT::v32bf16, Legal);
+      setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
+      setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
+      setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
     }
   }
 
diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
@@ -1166,3 +1166,25 @@ entry:
   %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
   ret <8 x bfloat> %2
 }
+
+define <32 x bfloat> @addv(<32 x bfloat> %a, <32 x bfloat> %b) nounwind {
+; X64-LABEL: addv:
+; X64:       # %bb.0:
+; X64-NEXT:    vaddnepbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2]
+; X64-NEXT:    vaddnepbf16 %ymm3, %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xcb]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: addv:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp # encoding: [0x55]
+; X86-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
+; X86-NEXT:    andl $-32, %esp # encoding: [0x83,0xe4,0xe0]
+; X86-NEXT:    subl $32, %esp # encoding: [0x83,0xec,0x20]
+; X86-NEXT:    vaddnepbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2]
+; X86-NEXT:    vaddnepbf16 8(%ebp), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0x8d,0x08,0x00,0x00,0x00]
+; X86-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
+; X86-NEXT:    popl %ebp # encoding: [0x5d]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %add = fadd <32 x bfloat> %a, %b
+  ret <32 x bfloat> %add
+}
diff --git a/mlir/lib/Conversion/MathToFuncs/MathToFuncs.cpp b/mlir/lib/Conversion/MathToFuncs/MathToFuncs.cpp
@@ -781,6 +781,9 @@ struct ConvertMathToFuncsPass
   // or equal to minWidthOfFPowIExponent option value.
   bool isFPowIConvertible(math::FPowIOp op);
 
+  // Reture true, if operation is integer type.
+  bool isConvertible(Operation *op);
+
   // Generate outlined implementations for power operations
   // and store them in funcImpls map.
   void generateOpImplementations();
@@ -798,13 +801,17 @@ bool ConvertMathToFuncsPass::isFPowIConvertible(math::FPowIOp op) {
   return (expTy && expTy.getWidth() >= minWidthOfFPowIExponent);
 }
 
+bool ConvertMathToFuncsPass::isConvertible(Operation *op) {
+  return isa<IntegerType>(getElementTypeOrSelf(op->getResult(0).getType()));
+}
+
 void ConvertMathToFuncsPass::generateOpImplementations() {
   ModuleOp module = getOperation();
 
   module.walk([&](Operation *op) {
     TypeSwitch<Operation *>(op)
         .Case<math::CountLeadingZerosOp>([&](math::CountLeadingZerosOp op) {
-          if (!convertCtlz)
+          if (!convertCtlz || !isConvertible(op))
             return;
           Type resultType = getElementTypeOrSelf(op.getResult().getType());
 
@@ -816,6 +823,9 @@ void ConvertMathToFuncsPass::generateOpImplementations() {
             entry.first->second = createCtlzFunc(&module, resultType);
         })
         .Case<math::IPowIOp>([&](math::IPowIOp op) {
+          if (!isConvertible(op))
+            return;
+
           Type resultType = getElementTypeOrSelf(op.getResult().getType());
 
           // Generate the software implementation of this operation,
@@ -873,9 +883,12 @@ void ConvertMathToFuncsPass::runOnOperation() {
                          func::FuncDialect, scf::SCFDialect,
                          vector::VectorDialect>();
 
-  target.addIllegalOp<math::IPowIOp>();
-  if (convertCtlz)
-    target.addIllegalOp<math::CountLeadingZerosOp>();
+  target.addDynamicallyLegalOp<math::IPowIOp>(
+      [this](math::IPowIOp op) { return !isConvertible(op); });
+  if (convertCtlz) {
+    target.addDynamicallyLegalOp<math::CountLeadingZerosOp>(
+        [this](math::CountLeadingZerosOp op) { return !isConvertible(op); });
+  }
   target.addDynamicallyLegalOp<math::FPowIOp>(
       [this](math::FPowIOp op) { return !isFPowIConvertible(op); });
   if (failed(applyPartialConversion(module, target, std::move(patterns))))
diff --git a/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp b/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp
@@ -107,9 +107,10 @@ struct SelectOpInterface
     // If trueValue <= falseValue:
     // * result <= falseValue
     // * result >= trueValue
-    if (cstr.compare(/*lhs=*/{trueValue, dim},
-                     ValueBoundsConstraintSet::ComparisonOperator::LE,
-                     /*rhs=*/{falseValue, dim})) {
+    if (cstr.populateAndCompare(
+            /*lhs=*/{trueValue, dim},
+            ValueBoundsConstraintSet::ComparisonOperator::LE,
+            /*rhs=*/{falseValue, dim})) {
       if (dim) {
         cstr.bound(value)[*dim] >= cstr.getExpr(trueValue, dim);
         cstr.bound(value)[*dim] <= cstr.getExpr(falseValue, dim);
@@ -121,9 +122,10 @@ struct SelectOpInterface
     // If falseValue <= trueValue:
     // * result <= trueValue
     // * result >= falseValue
-    if (cstr.compare(/*lhs=*/{falseValue, dim},
-                     ValueBoundsConstraintSet::ComparisonOperator::LE,
-                     /*rhs=*/{trueValue, dim})) {
+    if (cstr.populateAndCompare(
+            /*lhs=*/{falseValue, dim},
+            ValueBoundsConstraintSet::ComparisonOperator::LE,
+            /*rhs=*/{trueValue, dim})) {
       if (dim) {
         cstr.bound(value)[*dim] >= cstr.getExpr(falseValue, dim);
         cstr.bound(value)[*dim] <= cstr.getExpr(trueValue, dim);
diff --git a/mlir/test/Conversion/MathToFuncs/ctlz.mlir b/mlir/test/Conversion/MathToFuncs/ctlz.mlir
@@ -91,3 +91,13 @@ func.func @main(%arg0: i8) {
   func.return
 }
 
+// -----
+
+// Check that index is not converted
+
+// CHECK-LABEL: func.func @ctlz_index
+// CHECK:         math.ctlz
+func.func @ctlz_index(%arg0: index) {
+  %0 = math.ctlz %arg0 : index
+  func.return
+}
diff --git a/mlir/test/Conversion/MathToFuncs/ipowi.mlir b/mlir/test/Conversion/MathToFuncs/ipowi.mlir
@@ -170,3 +170,14 @@ func.func @ipowi_vec(%arg0: vector<2x3xi64>, %arg1: vector<2x3xi64>) {
   %0 = math.ipowi %arg0, %arg1 : vector<2x3xi64>
   func.return
 }
+
+// -----
+
+// Check that index is not converted
+
+// CHECK-LABEL: func.func @ipowi_index
+// CHECK:         math.ipowi
+func.func @ipowi_index(%arg0: index, %arg1: index) {
+  %0 = math.ipowi %arg0, %arg1 : index
+  func.return
+}