select with mismatched shape

jroelofs · jroelofs · commit c0c63f392205 · 2025-06-05T13:46:58.000-07:00
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -271,7 +271,9 @@ computeShapeInfoForInst(Instruction *I,
   }
 
   if (auto *Select = dyn_cast<SelectInst>(I)) {
-    for (Use &Op : Select->getCondition()->getType()->isVectorTy() ? I->operands() : drop_begin(I->operands())) {
+    Type *CondTy = Select->getCondition()->getType();
+    for (Use &Op : CondTy->isVectorTy() ? Select->operands()
+                                        : drop_begin(Select->operands())) {
       auto OpShape = ShapeMap.find(Op);
       if (OpShape != ShapeMap.end())
         return OpShape->second;
@@ -719,10 +721,12 @@ class LowerMatrixIntrinsics {
         // backward propagate to an instruction with an already known shape.
       } else if (auto *Select = dyn_cast<SelectInst>(V)) {
         ShapeInfo Shape = ShapeMap[V];
-        if (setShapeInfo(Select->getOperand(1), Shape))
-          pushInstruction(Select, WorkList);
-        if (setShapeInfo(Select->getOperand(2), Shape))
-          pushInstruction(Select, WorkList);
+        Type *CondTy = Select->getCondition()->getType();
+        for (Use &Op : CondTy->isVectorTy() ? Select->operands()
+                                            : drop_begin(Select->operands())) {
+          if (setShapeInfo(Op, Shape))
+            pushInstruction(Select, WorkList);
+        }
       } else if (isUniformShape(V)) {
         // Propagate to all operands.
         ShapeInfo Shape = ShapeMap[V];
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll
@@ -67,39 +67,41 @@ define void @select_2x2_rhs(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) {
   ret void
 }
 
-define void @select_2x2_vcond(<4 x i1> %cond, ptr %lhs, ptr %rhs, ptr %out) {
-; CHECK-LABEL: @select_2x2_vcond(
+define void @select_2x2_vcond_shape1(ptr %cond, ptr %lhs, ptr %rhs, ptr %out) {
+; CHECK-LABEL: @select_2x2_vcond_shape1(
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x float>, ptr [[LHS:%.*]], align 16
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, ptr [[LHS]], i64 2
 ; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8
-; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 4
-; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr float, ptr [[RHS]], i64 2
-; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 4
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x i1> [[COND:%.*]], <4 x i1> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <4 x i1> [[COND]], <4 x i1> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[SPLIT]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[SPLIT5]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD4]]
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x i1>, ptr [[RHS:%.*]], align 1
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr i1, ptr [[RHS]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x i1>, ptr [[VEC_GEP3]], align 1
+; CHECK-NEXT:    [[COL_LOAD5:%.*]] = load <2 x float>, ptr [[RHS1:%.*]], align 4
+; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr float, ptr [[RHS1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD7:%.*]] = load <2 x float>, ptr [[VEC_GEP6]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[COL_LOAD2]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD5]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[COL_LOAD4]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD7]]
 ; CHECK-NEXT:    store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 16
-; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr float, ptr [[OUT]], i64 2
-; CHECK-NEXT:    store <2 x float> [[TMP2]], ptr [[VEC_GEP6]], align 8
+; CHECK-NEXT:    [[VEC_GEP8:%.*]] = getelementptr float, ptr [[OUT]], i64 2
+; CHECK-NEXT:    store <2 x float> [[TMP2]], ptr [[VEC_GEP8]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %lhsv = load <4 x float>, ptr %lhs
+  %condv = load <4 x i1>, ptr %cond
   %rhsv = call <4 x float> @llvm.matrix.column.major.load(ptr %rhs, i64 2, i1 false, i32 2, i32 2)
-  %op = select <4 x i1> %cond, <4 x float> %lhsv, <4 x float> %rhsv
+  %op = select <4 x i1> %condv, <4 x float> %lhsv, <4 x float> %rhsv
   store <4 x float> %op, ptr %out
   ret void
 }
 
-define void @select_2x2_vcond_shape(ptr %lhs, ptr %rhs, ptr %out) {
-; CHECK-LABEL: @select_2x2_vcond_shape(
+define void @select_2x2_vcond_shape2(ptr %cond, ptr %lhs, ptr %rhs, ptr %out) {
+; CHECK-LABEL: @select_2x2_vcond_shape2(
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x float>, ptr [[LHS:%.*]], align 16
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, ptr [[LHS]], i64 2
 ; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8
-; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x i1>, ptr [[RHS:%.*]], align 1
-; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr i1, ptr [[RHS]], i64 2
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x i1>, ptr [[COND:%.*]], align 1
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr i1, ptr [[COND]], i64 2
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x i1>, ptr [[VEC_GEP3]], align 1
-; CHECK-NEXT:    [[COL_LOAD5:%.*]] = load <2 x float>, ptr [[RHS]], align 4
+; CHECK-NEXT:    [[COL_LOAD5:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 4
 ; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr float, ptr [[RHS]], i64 2
 ; CHECK-NEXT:    [[COL_LOAD7:%.*]] = load <2 x float>, ptr [[VEC_GEP6]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[COL_LOAD2]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD5]]
@@ -110,9 +112,35 @@ define void @select_2x2_vcond_shape(ptr %lhs, ptr %rhs, ptr %out) {
 ; CHECK-NEXT:    ret void
 ;
   %lhsv = load <4 x float>, ptr %lhs
-  %cond = call <4 x i1> @llvm.matrix.column.major.load(ptr %rhs, i64 2, i1 false, i32 2, i32 2)
+  %condv = call <4 x i1> @llvm.matrix.column.major.load(ptr %cond, i64 2, i1 false, i32 2, i32 2)
+  %rhsv = call <4 x float> @llvm.matrix.column.major.load(ptr %rhs, i64 2, i1 false, i32 2, i32 2)
+  %op = select <4 x i1> %condv, <4 x float> %lhsv, <4 x float> %rhsv
+  store <4 x float> %op, ptr %out
+  ret void
+}
+
+define void @select_2x2_vcond_shape3(ptr %cond, ptr %lhs, ptr %rhs, ptr %out) {
+; CHECK-LABEL: @select_2x2_vcond_shape3(
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x float>, ptr [[LHS:%.*]], align 16
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, ptr [[LHS]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <4 x i1>, ptr [[COND:%.*]], align 1
+; CHECK-NEXT:    [[COL_LOAD3:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 4
+; CHECK-NEXT:    [[VEC_GEP4:%.*]] = getelementptr float, ptr [[RHS]], i64 2
+; CHECK-NEXT:    [[COL_LOAD5:%.*]] = load <2 x float>, ptr [[VEC_GEP4]], align 4
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x i1> [[COL_LOAD2]], <4 x i1> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT6:%.*]] = shufflevector <4 x i1> [[COL_LOAD2]], <4 x i1> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[SPLIT]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[SPLIT6]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD5]]
+; CHECK-NEXT:    store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[VEC_GEP7:%.*]] = getelementptr float, ptr [[OUT]], i64 2
+; CHECK-NEXT:    store <2 x float> [[TMP2]], ptr [[VEC_GEP7]], align 8
+; CHECK-NEXT:    ret void
+;
+  %lhsv = load <4 x float>, ptr %lhs
+  %condv = call <4 x i1> @llvm.matrix.column.major.load(ptr %cond, i64 4, i1 false, i32 4, i32 1)
   %rhsv = call <4 x float> @llvm.matrix.column.major.load(ptr %rhs, i64 2, i1 false, i32 2, i32 2)
-  %op = select <4 x i1> %cond, <4 x float> %lhsv, <4 x float> %rhsv
+  %op = select <4 x i1> %condv, <4 x float> %lhsv, <4 x float> %rhsv
   store <4 x float> %op, ptr %out
   ret void
 }