[CIR][Dialect] Add BinOpKind_Max (#1201)

ghehg · web-flow · commit 994ceb5f7f87 · 2024-12-12T09:41:38.000-08:00
This would facilitate implementation of neon intrinsic `neon_vmax_v` and
`__builtin_elementwise_max`, and potentially future optimizations. CIR
BinOp supports vector type.
Floating point has already been supported by FMaxOp.
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -1157,14 +1157,15 @@ def BinOpKind_Sub : I32EnumAttrCase<"Sub", 5, "sub">;
 def BinOpKind_And : I32EnumAttrCase<"And", 8, "and">;
 def BinOpKind_Xor : I32EnumAttrCase<"Xor", 9, "xor">;
 def BinOpKind_Or  : I32EnumAttrCase<"Or", 10, "or">;
+def BinOpKind_Max : I32EnumAttrCase<"Max", 11, "max">;
 
 def BinOpKind : I32EnumAttr<
     "BinOpKind",
     "binary operation (arith and logic) kind",
     [BinOpKind_Mul, BinOpKind_Div, BinOpKind_Rem,
      BinOpKind_Add, BinOpKind_Sub,
      BinOpKind_And, BinOpKind_Xor,
-     BinOpKind_Or]> {
+     BinOpKind_Or, BinOpKind_Max]> {
   let cppNamespace = "::cir";
 }
 
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2574,6 +2574,15 @@ mlir::LogicalResult CIRToLLVMBinOpLowering::matchAndRewrite(
   case cir::BinOpKind::Xor:
     rewriter.replaceOpWithNewOp<mlir::LLVM::XOrOp>(op, lhs, rhs);
     break;
+  case cir::BinOpKind::Max:
+    if (mlir::isa<mlir::IntegerType>(llvmEltTy)) {
+      auto isUnsigned = isIntTypeUnsigned(type);
+      if (isUnsigned)
+        rewriter.replaceOpWithNewOp<mlir::LLVM::UMaxOp>(op, llvmTy, lhs, rhs);
+      else
+        rewriter.replaceOpWithNewOp<mlir::LLVM::SMaxOp>(op, llvmTy, lhs, rhs);
+    }
+    break;
   }
 
   return mlir::LogicalResult::success();
diff --git a/clang/test/CIR/Lowering/binop-signed-int.cir b/clang/test/CIR/Lowering/binop-signed-int.cir
@@ -7,6 +7,8 @@ module {
     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
     %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %100 = cir.alloca !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>, ["vec1", init] {alignment = 8 : i64}
+    %101 = cir.alloca !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>, ["vec2", init] {alignment = 8 : i64}
     %3 = cir.const #cir.int<2> : !s32i    cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
     %4 = cir.const #cir.int<1> : !s32i    cir.store %4, %1 : !s32i, !cir.ptr<!s32i>
     %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
@@ -63,6 +65,12 @@ module {
     %36 = cir.binop(sub, %32, %33) sat: !s32i
     // CHECK: = llvm.intr.ssub.sat{{.*}}(i32, i32) -> i32 
     cir.store %34, %2 : !s32i, !cir.ptr<!s32i>
+    %37 = cir.binop(max, %32, %33) : !s32i
+    // CHECK: = llvm.intr.smax
+    %38 = cir.load %100 : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+    %39 = cir.load %101 : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+    %40 = cir.binop(max, %38, %39) : !cir.vector<!s32i x 2>
+    // CHECK: = llvm.intr.smax({{%.*}}, {{%.*}}) : (vector<2xi32>, vector<2xi32>) -> vector<2xi32>
     cir.return
   }
 }
diff --git a/clang/test/CIR/Lowering/binop-unsigned-int.cir b/clang/test/CIR/Lowering/binop-unsigned-int.cir
@@ -7,6 +7,8 @@ module {
     %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
     %1 = cir.alloca !u32i, !cir.ptr<!u32i>, ["b", init] {alignment = 4 : i64}
     %2 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+    %100 = cir.alloca !cir.vector<!u32i x 2>, !cir.ptr<!cir.vector<!u32i x 2>>, ["vec1", init] {alignment = 8 : i64}
+    %101 = cir.alloca !cir.vector<!u32i x 2>, !cir.ptr<!cir.vector<!u32i x 2>>, ["vec2", init] {alignment = 8 : i64}
     %3 = cir.const #cir.int<2> : !u32i    cir.store %3, %0 : !u32i, !cir.ptr<!u32i>
     %4 = cir.const #cir.int<1> : !u32i    cir.store %4, %1 : !u32i, !cir.ptr<!u32i>
     %5 = cir.load %0 : !cir.ptr<!u32i>, !u32i
@@ -51,6 +53,10 @@ module {
     cir.store %34, %2 : !u32i, !cir.ptr<!u32i>
     %35 = cir.binop(add, %32, %33) sat: !u32i
     %36 = cir.binop(sub, %32, %33) sat: !u32i  
+    %37 = cir.binop(max, %32, %33) : !u32i
+    %38 = cir.load %100 : !cir.ptr<!cir.vector<!u32i x 2>>, !cir.vector<!u32i x 2>
+    %39 = cir.load %101 : !cir.ptr<!cir.vector<!u32i x 2>>, !cir.vector<!u32i x 2>
+    %40 = cir.binop(max, %38, %39) : !cir.vector<!u32i x 2>
     cir.return
   }
 }
@@ -64,8 +70,11 @@ module {
 // MLIR: = llvm.shl
 // MLIR: = llvm.and
 // MLIR: = llvm.xor
+// MLIR: = llvm.or
 // MLIR: = llvm.intr.uadd.sat{{.*}}(i32, i32) -> i32
 // MLIR: = llvm.intr.usub.sat{{.*}}(i32, i32) -> i32 
+// MLIR: = llvm.intr.umax
+// MLIR: = llvm.intr.umax
 
 // LLVM: = mul i32
 // LLVM: = udiv i32
@@ -79,3 +88,5 @@ module {
 // LLVM: = or i32
 // LLVM: = call i32 @llvm.uadd.sat.i32
 // LLVM: = call i32 @llvm.usub.sat.i32
+// LLVM: = call i32 @llvm.umax.i32
+// LLVM: = call <2 x i32> @llvm.umax.v2i32