[Matrix] Propagate shape information through (f)abs insts #141704

jroelofs · 2025-05-28T02:16:44Z

No description provided.

llvmbot · 2025-05-28T02:17:20Z

@llvm/pr-subscribers-llvm-transforms

Author: Jon Roelofs (jroelofs)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/141704.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp (+54-1)
(added) llvm/test/Transforms/LowerMatrixIntrinsics/binop.ll (+69)

diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 56d4be513ea6f..a3f1ca1644002 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -229,10 +229,20 @@ static bool isUniformShape(Value *V) {
   if (!I)
     return true;
 
+  if (auto *II = dyn_cast<IntrinsicInst>(V))
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::abs:
+    case Intrinsic::fabs:
+      return true;
+    default:
+      return false;
+    }
+
   switch (I->getOpcode()) {
   case Instruction::FAdd:
   case Instruction::FSub:
   case Instruction::FMul: // Scalar multiply.
+  case Instruction::FDiv:
   case Instruction::FNeg:
   case Instruction::Add:
   case Instruction::Mul:
@@ -624,7 +634,7 @@ class LowerMatrixIntrinsics {
       case Intrinsic::matrix_column_major_store:
         return true;
       default:
-        return false;
+        return isUniformShape(II);
       }
     return isUniformShape(V) || isa<StoreInst>(V) || isa<LoadInst>(V);
   }
@@ -1130,6 +1140,9 @@ class LowerMatrixIntrinsics {
     case Intrinsic::matrix_column_major_store:
       LowerColumnMajorStore(Inst);
       break;
+    case Intrinsic::abs:
+    case Intrinsic::fabs:
+      return VisitUniformIntrinsic(cast<IntrinsicInst>(Inst));
     default:
       return false;
     }
@@ -2167,6 +2180,8 @@ class LowerMatrixIntrinsics {
         return Builder.CreateFAdd(LHS, RHS);
       case Instruction::FMul:
         return Builder.CreateFMul(LHS, RHS);
+      case Instruction::FDiv:
+        return Builder.CreateFDiv(LHS, RHS);
       case Instruction::FSub:
         return Builder.CreateFSub(LHS, RHS);
       default:
@@ -2220,6 +2235,44 @@ class LowerMatrixIntrinsics {
     return true;
   }
 
+  /// Lower uniform shape intrinsics, if shape information is available.
+  bool VisitUniformIntrinsic(IntrinsicInst *Inst) {
+    auto I = ShapeMap.find(Inst);
+    if (I == ShapeMap.end())
+      return false;
+
+    IRBuilder<> Builder(Inst);
+    ShapeInfo &Shape = I->second;
+
+    MatrixTy Result;
+
+    switch (Inst->getIntrinsicID()) {
+    case Intrinsic::abs:
+    case Intrinsic::fabs: {
+      Value *Op = Inst->getOperand(0);
+
+      MatrixTy M = getMatrix(Op, Shape, Builder);
+
+      Builder.setFastMathFlags(getFastMathFlags(Inst));
+
+      for (unsigned I = 0; I < Shape.getNumVectors(); ++I)
+        switch (Inst->getIntrinsicID()) {
+        case Intrinsic::abs:
+          Result.addVector(Builder.CreateBinaryIntrinsic(Intrinsic::abs, M.getVector(I), Inst->getOperand(1)));
+          break;
+        case Intrinsic::fabs:
+          Result.addVector(Builder.CreateUnaryIntrinsic(Inst->getIntrinsicID(), M.getVector(I)));
+          break;
+        }
+
+      finalizeLowering(Inst, Result.addNumComputeOps(getNumOps(Result.getVectorTy()) * Result.getNumVectors()), Builder);
+      return true;
+    }
+    default:
+      llvm_unreachable("unexpected intrinsic");
+    }
+  }
+
   /// Helper to linearize a matrix expression tree into a string. Currently
   /// matrix expressions are linarized by starting at an expression leaf and
   /// linearizing bottom up.
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/binop.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/binop.ll
new file mode 100644
index 0000000000000..1eacb2a32e07d
--- /dev/null
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/binop.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
+
+define void @fdiv_2x2(ptr %num, ptr %denom, ptr %out) {
+; CHECK-LABEL: @fdiv_2x2(
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[NUM:%.*]], align 32
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[NUM]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 16
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[DENOM:%.*]], align 32
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[DENOM]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <2 x double> [[COL_LOAD]], [[COL_LOAD2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fdiv <2 x double> [[COL_LOAD1]], [[COL_LOAD4]]
+; CHECK-NEXT:    store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
+; CHECK-NEXT:    [[VEC_GEP5:%.*]] = getelementptr double, ptr [[OUT]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[VEC_GEP5]], align 16
+; CHECK-NEXT:    ret void
+;
+  %numv = load <4 x double>, ptr %num
+  %denomv = load <4 x double>, ptr %denom
+  %div = fdiv <4 x double> %numv, %denomv
+  %divt  = call <4 x double> @llvm.matrix.transpose(<4 x double> %div, i32 2, i32 2)
+  %divtt = call <4 x double> @llvm.matrix.transpose(<4 x double> %divt, i32 2, i32 2)
+  store <4 x double> %divtt, ptr %out
+  ret void
+}
+
+define void @fabs_2x2f64(ptr %in, ptr %out) {
+; CHECK-LABEL: @fabs_2x2f64(
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[IN:%.*]], align 32
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[COL_LOAD]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[COL_LOAD1]])
+; CHECK-NEXT:    store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr [[OUT]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[VEC_GEP2]], align 16
+; CHECK-NEXT:    ret void
+;
+  %load = load <4 x double>, ptr %in
+  %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %load)
+  %fabst  = call <4 x double> @llvm.matrix.transpose(<4 x double> %fabs, i32 2, i32 2)
+  %fabstt = call <4 x double> @llvm.matrix.transpose(<4 x double> %fabst, i32 2, i32 2)
+  store <4 x double> %fabstt, ptr %out
+  ret void
+}
+
+define void @fabs_2x2i32(ptr %in, ptr %out) {
+; CHECK-LABEL: @fabs_2x2i32(
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x i32>, ptr [[IN:%.*]], align 16
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x i32>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[COL_LOAD]], i1 false)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[COL_LOAD1]], i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP1]], i1 true)
+; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP2]], i1 true)
+; CHECK-NEXT:    store <2 x i32> [[TMP3]], ptr [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[OUT]], i64 2
+; CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[VEC_GEP2]], align 8
+; CHECK-NEXT:    ret void
+;
+  %load = load <4 x i32>, ptr %in
+  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %load, i1 false)
+  %abst  = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %abs, i32 2, i32 2)
+  %abstt = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %abst, i32 2, i32 2)
+  %absabstt = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abstt, i1 true)
+  store <4 x i32> %absabstt, ptr %out
+  ret void
+}

jroelofs · 2025-05-28T02:18:48Z

This is a stacked PR. Its counterpart should be merged first: #141705

github-actions · 2025-05-28T02:19:51Z

✅ With the latest revision this PR passed the C/C++ code formatter.

…-fabs

llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp

…-fabs

llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp

Co-authored-by: Florian Hahn <[email protected]>

fhahn

LGTM, thanks

jroelofs requested review from anemet and fhahn May 28, 2025 02:16

llvmbot added the llvm:transforms label May 28, 2025

jroelofs force-pushed the jroelofs/lower-matrix-fabs branch from a70843a to 41eeb1e Compare May 28, 2025 02:26

jroelofs mentioned this pull request May 28, 2025

[Matrix] Propagate shape information through cast insts #141869

Merged

[Matrix] Propagate shape information through (f)abs insts

0b5ffd2

jroelofs force-pushed the jroelofs/lower-matrix-fabs branch from 84767fb to 0b5ffd2 Compare May 28, 2025 22:45

jroelofs added 3 commits June 1, 2025 10:17

Merge remote-tracking branch 'origin/main' into jroelofs/lower-matrix…

cba29af

…-fabs

Merge remote-tracking branch 'origin/main' into jroelofs/lower-matrix…

7984ad9

…-fabs

switch to col major store intrinsics in binop.ll

9a7d824

fhahn reviewed Jun 2, 2025

View reviewed changes

jroelofs added 2 commits June 2, 2025 11:46

Merge branch 'main' into jroelofs/lower-matrix-fabs

f7fef74

review feedback

5a8d3b3

jroelofs requested a review from fhahn June 4, 2025 02:10

fhahn reviewed Jun 5, 2025

View reviewed changes

llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp Outdated Show resolved Hide resolved

jroelofs added 2 commits June 5, 2025 11:23

Merge remote-tracking branch 'origin/main' into jroelofs/lower-matrix…

9f9b27d

…-fabs

Move VisitUnaryIntrinsic into caller, rename caller

f558791

fhahn reviewed Jun 5, 2025

View reviewed changes

llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp Show resolved Hide resolved

jroelofs and others added 3 commits June 5, 2025 13:53

remove newlines

67dd170

Co-authored-by: Florian Hahn <[email protected]>

continue's

c29ab46

return's

4dcb988

fhahn approved these changes Jun 6, 2025

View reviewed changes

jroelofs merged commit 274f5a8 into llvm:main Jun 9, 2025
7 checks passed

jroelofs deleted the jroelofs/lower-matrix-fabs branch June 9, 2025 19:52

tomtor pushed a commit to tomtor/llvm-project that referenced this pull request Jun 14, 2025

[Matrix] Propagate shape information through (f)abs insts (llvm#141704)

0f36a14

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Matrix] Propagate shape information through (f)abs insts #141704

[Matrix] Propagate shape information through (f)abs insts #141704

Uh oh!

jroelofs commented May 28, 2025

Uh oh!

llvmbot commented May 28, 2025

Uh oh!

jroelofs commented May 28, 2025

Uh oh!

github-actions bot commented May 28, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

fhahn left a comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[Matrix] Propagate shape information through (f)abs insts #141704

[Matrix] Propagate shape information through (f)abs insts #141704

Uh oh!

Conversation

jroelofs commented May 28, 2025

Uh oh!

llvmbot commented May 28, 2025

Uh oh!

jroelofs commented May 28, 2025

Uh oh!

github-actions bot commented May 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

github-actions bot commented May 28, 2025 •

edited

Loading