diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index a7072ea719292..ce6eaa292d8fb 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -323,9 +323,11 @@ computeShapeInfoForInst(Instruction *I, return OpShape->second; } - if (isUniformShape(I)) { + if (isUniformShape(I) || isa(I)) { + auto Ops = I->operands(); + auto ShapedOps = isa(I) ? drop_begin(Ops) : Ops; // Find the first operand that has a known shape and use that. - for (auto &Op : I->operands()) { + for (auto &Op : ShapedOps) { auto OpShape = ShapeMap.find(Op.get()); if (OpShape != ShapeMap.end()) return OpShape->second; @@ -701,7 +703,8 @@ class LowerMatrixIntrinsics { default: return isUniformShape(II); } - return isUniformShape(V) || isa(V) || isa(V); + return isUniformShape(V) || isa(V) || isa(V) || + isa(V); } /// Propagate the shape information of instructions to their users. @@ -788,10 +791,12 @@ class LowerMatrixIntrinsics { } else if (isa(V)) { // Nothing to do. We forward-propagated to this so we would just // backward propagate to an instruction with an already known shape. - } else if (isUniformShape(V)) { + } else if (isUniformShape(V) || isa(V)) { + auto Ops = cast(V)->operands(); + auto ShapedOps = isa(V) ? drop_begin(Ops) : Ops; // Propagate to all operands. ShapeInfo Shape = ShapeMap[V]; - for (Use &U : cast(V)->operands()) { + for (Use &U : ShapedOps) { if (setShapeInfo(U.get(), Shape)) pushInstruction(U.get(), WorkList); } @@ -1148,6 +1153,8 @@ class LowerMatrixIntrinsics { Result = VisitUnaryOperator(UnOp, SI); else if (auto *Intr = dyn_cast(Inst)) Result = VisitIntrinsicInst(Intr, SI); + else if (auto *Select = dyn_cast(Inst)) + Result = VisitSelectInst(Select, SI); else if (match(Inst, m_Load(m_Value(Op1)))) Result = VisitLoad(cast(Inst), SI, Op1); else if (match(Inst, m_Store(m_Value(Op1), m_Value(Op2)))) @@ -2307,6 +2314,36 @@ class LowerMatrixIntrinsics { Result.getNumVectors()); } + /// Lower selects. + MatrixTy VisitSelectInst(SelectInst *Inst, const ShapeInfo &Shape) { + Value *Cond = Inst->getOperand(0); + Value *OpA = Inst->getOperand(1); + Value *OpB = Inst->getOperand(2); + + IRBuilder<> Builder(Inst); + + MatrixTy Result; + MatrixTy A = getMatrix(OpA, Shape, Builder); + MatrixTy B = getMatrix(OpB, Shape, Builder); + + Value *CondV[2]; + if (isa(Cond->getType())) { + MatrixTy C = getMatrix(Cond, Shape, Builder); + CondV[0] = C.getVector(0); + CondV[1] = C.getVector(1); + } else { + CondV[0] = Cond; + CondV[1] = Cond; + } + + for (unsigned I = 0, E = Shape.getNumVectors(); I != E; ++I) + Result.addVector( + Builder.CreateSelect(CondV[I], A.getVector(I), B.getVector(I))); + + return Result.addNumComputeOps(getNumOps(Result.getVectorTy()) * + Result.getNumVectors()); + } + /// Helper to linearize a matrix expression tree into a string. Currently /// matrix expressions are linarized by starting at an expression leaf and /// linearizing bottom up. diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll new file mode 100644 index 0000000000000..70b0dfdb3e7e8 --- /dev/null +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s + +define void @select_2x2_bot(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) { +; CHECK-LABEL: @select_2x2_bot( +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr [[LHS:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[LHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr float, ptr [[RHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND:%.*]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD4]] +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 4 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr float, ptr [[OUT]], i64 2 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP5]], align 4 +; CHECK-NEXT: ret void +; + %lhsv = load <4 x float>, ptr %lhs + %rhsv = load <4 x float>, ptr %rhs + %op = select i1 %cond, <4 x float> %lhsv, <4 x float> %rhsv + call void @llvm.matrix.column.major.store(<4 x float> %op, ptr %out, i64 2, i1 false, i32 2, i32 2) + ret void +} + +define void @select_2x2_lhs(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) { +; CHECK-LABEL: @select_2x2_lhs( +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr [[LHS:%.*]], align 4 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[LHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 4 +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr float, ptr [[RHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND:%.*]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD4]] +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr float, ptr [[OUT]], i64 2 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP5]], align 8 +; CHECK-NEXT: ret void +; + %lhsv = call <4 x float> @llvm.matrix.column.major.load(ptr %lhs, i64 2, i1 false, i32 2, i32 2) + %rhsv = load <4 x float>, ptr %rhs + %op = select i1 %cond, <4 x float> %lhsv, <4 x float> %rhsv + store <4 x float> %op, ptr %out + ret void +} + +define void @select_2x2_rhs(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) { +; CHECK-LABEL: @select_2x2_rhs( +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[RHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x float>, ptr [[RHS1:%.*]], align 4 +; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr float, ptr [[RHS1]], i64 2 +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND:%.*]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD4]] +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr float, ptr [[OUT]], i64 2 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP2]], align 8 +; CHECK-NEXT: ret void +; + %lhsv = load <4 x float>, ptr %lhs + %rhsv = call <4 x float> @llvm.matrix.column.major.load(ptr %rhs, i64 2, i1 false, i32 2, i32 2) + %op = select i1 %cond, <4 x float> %lhsv, <4 x float> %rhsv + store <4 x float> %op, ptr %out + ret void +} + +define void @select_2x2_vcond_shape1(ptr %cond, ptr %lhs, ptr %rhs, ptr %out) { +; CHECK-LABEL: @select_2x2_vcond_shape1( +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr [[LHS:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[LHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[CONDV:%.*]] = load <4 x i1>, ptr [[COND:%.*]], align 1 +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x float>, ptr [[RHS1:%.*]], align 4 +; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr float, ptr [[RHS1]], i64 2 +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x float>, ptr [[VEC_GEP6]], align 4 +; CHECK-NEXT: [[COL_LOAD2:%.*]] = shufflevector <4 x i1> [[CONDV]], <4 x i1> poison, <2 x i32> +; CHECK-NEXT: [[COL_LOAD4:%.*]] = shufflevector <4 x i1> [[CONDV]], <4 x i1> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[COL_LOAD2]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD5]] +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[COL_LOAD4]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD7]] +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP8:%.*]] = getelementptr float, ptr [[OUT]], i64 2 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP8]], align 8 +; CHECK-NEXT: ret void +; + %lhsv = load <4 x float>, ptr %lhs + %condv = load <4 x i1>, ptr %cond + %rhsv = call <4 x float> @llvm.matrix.column.major.load(ptr %rhs, i64 2, i1 false, i32 2, i32 2) + %op = select <4 x i1> %condv, <4 x float> %lhsv, <4 x float> %rhsv + store <4 x float> %op, ptr %out + ret void +} + +define void @select_2x2_vcond_shape2(ptr %cond, ptr %lhs, ptr %rhs, ptr %out) { +; CHECK-LABEL: @select_2x2_vcond_shape2( +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr [[LHS:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[LHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x i1>, ptr [[COND:%.*]], align 1 +; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr i1, ptr [[COND]], i64 2 +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x i1>, ptr [[VEC_GEP3]], align 1 +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 4 +; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr float, ptr [[RHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x float>, ptr [[VEC_GEP6]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[COL_LOAD2]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD5]] +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[COL_LOAD4]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD7]] +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP8:%.*]] = getelementptr float, ptr [[OUT]], i64 2 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP8]], align 8 +; CHECK-NEXT: ret void +; + %lhsv = load <4 x float>, ptr %lhs + %condv = call <4 x i1> @llvm.matrix.column.major.load(ptr %cond, i64 2, i1 false, i32 2, i32 2) + %rhsv = call <4 x float> @llvm.matrix.column.major.load(ptr %rhs, i64 2, i1 false, i32 2, i32 2) + %op = select <4 x i1> %condv, <4 x float> %lhsv, <4 x float> %rhsv + store <4 x float> %op, ptr %out + ret void +} + +define void @select_2x2_vcond_shape3(ptr %cond, ptr %lhs, ptr %rhs, ptr %out) { +; CHECK-LABEL: @select_2x2_vcond_shape3( +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr [[LHS:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[LHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <4 x i1>, ptr [[COND:%.*]], align 1 +; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 4 +; CHECK-NEXT: [[VEC_GEP4:%.*]] = getelementptr float, ptr [[RHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x float>, ptr [[VEC_GEP4]], align 4 +; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <4 x i1> [[COL_LOAD2]], <4 x i1> poison, <2 x i32> +; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <4 x i1> [[COL_LOAD2]], <4 x i1> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[SPLIT]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD3]] +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[SPLIT6]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD5]] +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP7:%.*]] = getelementptr float, ptr [[OUT]], i64 2 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP7]], align 8 +; CHECK-NEXT: ret void +; + %lhsv = load <4 x float>, ptr %lhs + %condv = call <4 x i1> @llvm.matrix.column.major.load(ptr %cond, i64 4, i1 false, i32 4, i32 1) + %rhsv = call <4 x float> @llvm.matrix.column.major.load(ptr %rhs, i64 2, i1 false, i32 2, i32 2) + %op = select <4 x i1> %condv, <4 x float> %lhsv, <4 x float> %rhsv + store <4 x float> %op, ptr %out + ret void +}