Skip to content

Conversation

@jroelofs
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented May 28, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Jon Roelofs (jroelofs)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/141704.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp (+54-1)
  • (added) llvm/test/Transforms/LowerMatrixIntrinsics/binop.ll (+69)
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 56d4be513ea6f..a3f1ca1644002 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -229,10 +229,20 @@ static bool isUniformShape(Value *V) {
   if (!I)
     return true;
 
+  if (auto *II = dyn_cast<IntrinsicInst>(V))
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::abs:
+    case Intrinsic::fabs:
+      return true;
+    default:
+      return false;
+    }
+
   switch (I->getOpcode()) {
   case Instruction::FAdd:
   case Instruction::FSub:
   case Instruction::FMul: // Scalar multiply.
+  case Instruction::FDiv:
   case Instruction::FNeg:
   case Instruction::Add:
   case Instruction::Mul:
@@ -624,7 +634,7 @@ class LowerMatrixIntrinsics {
       case Intrinsic::matrix_column_major_store:
         return true;
       default:
-        return false;
+        return isUniformShape(II);
       }
     return isUniformShape(V) || isa<StoreInst>(V) || isa<LoadInst>(V);
   }
@@ -1130,6 +1140,9 @@ class LowerMatrixIntrinsics {
     case Intrinsic::matrix_column_major_store:
       LowerColumnMajorStore(Inst);
       break;
+    case Intrinsic::abs:
+    case Intrinsic::fabs:
+      return VisitUniformIntrinsic(cast<IntrinsicInst>(Inst));
     default:
       return false;
     }
@@ -2167,6 +2180,8 @@ class LowerMatrixIntrinsics {
         return Builder.CreateFAdd(LHS, RHS);
       case Instruction::FMul:
         return Builder.CreateFMul(LHS, RHS);
+      case Instruction::FDiv:
+        return Builder.CreateFDiv(LHS, RHS);
       case Instruction::FSub:
         return Builder.CreateFSub(LHS, RHS);
       default:
@@ -2220,6 +2235,44 @@ class LowerMatrixIntrinsics {
     return true;
   }
 
+  /// Lower uniform shape intrinsics, if shape information is available.
+  bool VisitUniformIntrinsic(IntrinsicInst *Inst) {
+    auto I = ShapeMap.find(Inst);
+    if (I == ShapeMap.end())
+      return false;
+
+    IRBuilder<> Builder(Inst);
+    ShapeInfo &Shape = I->second;
+
+    MatrixTy Result;
+
+    switch (Inst->getIntrinsicID()) {
+    case Intrinsic::abs:
+    case Intrinsic::fabs: {
+      Value *Op = Inst->getOperand(0);
+
+      MatrixTy M = getMatrix(Op, Shape, Builder);
+
+      Builder.setFastMathFlags(getFastMathFlags(Inst));
+
+      for (unsigned I = 0; I < Shape.getNumVectors(); ++I)
+        switch (Inst->getIntrinsicID()) {
+        case Intrinsic::abs:
+          Result.addVector(Builder.CreateBinaryIntrinsic(Intrinsic::abs, M.getVector(I), Inst->getOperand(1)));
+          break;
+        case Intrinsic::fabs:
+          Result.addVector(Builder.CreateUnaryIntrinsic(Inst->getIntrinsicID(), M.getVector(I)));
+          break;
+        }
+
+      finalizeLowering(Inst, Result.addNumComputeOps(getNumOps(Result.getVectorTy()) * Result.getNumVectors()), Builder);
+      return true;
+    }
+    default:
+      llvm_unreachable("unexpected intrinsic");
+    }
+  }
+
   /// Helper to linearize a matrix expression tree into a string. Currently
   /// matrix expressions are linarized by starting at an expression leaf and
   /// linearizing bottom up.
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/binop.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/binop.ll
new file mode 100644
index 0000000000000..1eacb2a32e07d
--- /dev/null
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/binop.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
+
+define void @fdiv_2x2(ptr %num, ptr %denom, ptr %out) {
+; CHECK-LABEL: @fdiv_2x2(
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[NUM:%.*]], align 32
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[NUM]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 16
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[DENOM:%.*]], align 32
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[DENOM]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <2 x double> [[COL_LOAD]], [[COL_LOAD2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fdiv <2 x double> [[COL_LOAD1]], [[COL_LOAD4]]
+; CHECK-NEXT:    store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
+; CHECK-NEXT:    [[VEC_GEP5:%.*]] = getelementptr double, ptr [[OUT]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[VEC_GEP5]], align 16
+; CHECK-NEXT:    ret void
+;
+  %numv = load <4 x double>, ptr %num
+  %denomv = load <4 x double>, ptr %denom
+  %div = fdiv <4 x double> %numv, %denomv
+  %divt  = call <4 x double> @llvm.matrix.transpose(<4 x double> %div, i32 2, i32 2)
+  %divtt = call <4 x double> @llvm.matrix.transpose(<4 x double> %divt, i32 2, i32 2)
+  store <4 x double> %divtt, ptr %out
+  ret void
+}
+
+define void @fabs_2x2f64(ptr %in, ptr %out) {
+; CHECK-LABEL: @fabs_2x2f64(
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[IN:%.*]], align 32
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[COL_LOAD]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[COL_LOAD1]])
+; CHECK-NEXT:    store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr [[OUT]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[VEC_GEP2]], align 16
+; CHECK-NEXT:    ret void
+;
+  %load = load <4 x double>, ptr %in
+  %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %load)
+  %fabst  = call <4 x double> @llvm.matrix.transpose(<4 x double> %fabs, i32 2, i32 2)
+  %fabstt = call <4 x double> @llvm.matrix.transpose(<4 x double> %fabst, i32 2, i32 2)
+  store <4 x double> %fabstt, ptr %out
+  ret void
+}
+
+define void @fabs_2x2i32(ptr %in, ptr %out) {
+; CHECK-LABEL: @fabs_2x2i32(
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x i32>, ptr [[IN:%.*]], align 16
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x i32>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[COL_LOAD]], i1 false)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[COL_LOAD1]], i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP1]], i1 true)
+; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP2]], i1 true)
+; CHECK-NEXT:    store <2 x i32> [[TMP3]], ptr [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[OUT]], i64 2
+; CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[VEC_GEP2]], align 8
+; CHECK-NEXT:    ret void
+;
+  %load = load <4 x i32>, ptr %in
+  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %load, i1 false)
+  %abst  = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %abs, i32 2, i32 2)
+  %abstt = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %abst, i32 2, i32 2)
+  %absabstt = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abstt, i1 true)
+  store <4 x i32> %absabstt, ptr %out
+  ret void
+}

@jroelofs
Copy link
Contributor Author

This is a stacked PR. Its counterpart should be merged first: #141705

@github-actions
Copy link

github-actions bot commented May 28, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@jroelofs jroelofs force-pushed the jroelofs/lower-matrix-fabs branch from 84767fb to 0b5ffd2 Compare May 28, 2025 22:45
@jroelofs jroelofs requested a review from fhahn June 4, 2025 02:10
jroelofs and others added 3 commits June 5, 2025 13:53
Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks

@jroelofs jroelofs merged commit 274f5a8 into llvm:main Jun 9, 2025
7 checks passed
@jroelofs jroelofs deleted the jroelofs/lower-matrix-fabs branch June 9, 2025 19:52
tomtor pushed a commit to tomtor/llvm-project that referenced this pull request Jun 14, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants