Merge remote-tracking branch 'origin/main' into vplan-verify-def-use-phi

fhahn · fhahn · commit 370eaf1abf84 · 2025-05-13T09:58:15.000+01:00
diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
@@ -321,6 +321,11 @@ class ASTWalker : public RecursiveASTVisitor<ASTWalker> {
     return true;
   }
 
+  bool VisitCleanupAttr(CleanupAttr *attr) {
+    report(attr->getLocation(), attr->getFunctionDecl());
+    return true;
+  }
+
   // TypeLoc visitors.
   void reportType(SourceLocation RefLoc, NamedDecl *ND) {
     // Reporting explicit references to types nested inside classes can cause
diff --git a/clang-tools-extra/include-cleaner/unittests/WalkASTTest.cpp b/clang-tools-extra/include-cleaner/unittests/WalkASTTest.cpp
@@ -570,5 +570,11 @@ TEST(WalkAST, OperatorNewDelete) {
   testWalk("struct A { static void $ambiguous^operator delete(void*); };",
            "void foo() { A a; ^delete &a; }");
 }
+
+TEST(WalkAST, CleanupAttr) {
+  testWalk("void* $explicit^freep(void *p);",
+           "void foo() { __attribute__((^__cleanup__(freep))) char* x = 0; }");
+}
+
 } // namespace
 } // namespace clang::include_cleaner
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2249,7 +2249,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   return Dependence::BackwardVectorizable;
 }
 
-bool MemoryDepChecker::areDepsSafe(const DepCandidates &AccessSets,
+bool MemoryDepChecker::areDepsSafe(const DepCandidates &DepCands,
                                    const MemAccessInfoList &CheckDeps) {
 
   MinDepDistBytes = -1;
@@ -2260,9 +2260,9 @@ bool MemoryDepChecker::areDepsSafe(const DepCandidates &AccessSets,
 
     // Check accesses within this set.
     EquivalenceClasses<MemAccessInfo>::member_iterator AI =
-        AccessSets.findLeader(CurAccess);
+        DepCands.findLeader(CurAccess);
     EquivalenceClasses<MemAccessInfo>::member_iterator AE =
-        AccessSets.member_end();
+        DepCands.member_end();
 
     // Check every access pair.
     while (AI != AE) {
@@ -2527,9 +2527,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
     return true;
   }
 
-  MemoryDepChecker::DepCandidates DependentAccesses;
-  AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE,
-                          LoopAliasScopes);
+  MemoryDepChecker::DepCandidates DepCands;
+  AccessAnalysis Accesses(TheLoop, AA, LI, DepCands, *PSE, LoopAliasScopes);
 
   // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
   // multiple times on the same object. If the ptr is accessed twice, once
@@ -2651,8 +2650,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
   bool DepsAreSafe = true;
   if (Accesses.isDependencyCheckNeeded()) {
     LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
-    DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses,
-                                          Accesses.getDependenciesToCheck());
+    DepsAreSafe =
+        DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck());
 
     if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
       LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -1734,6 +1734,10 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() {
     if (Processed[i] || Real->getNumOperands() < 2)
       continue;
 
+    // Can only combined integer reductions at the moment.
+    if (!ReductionInfo[Real].second->getType()->isIntegerTy())
+      continue;
+
     RealPHI = ReductionInfo[Real].first;
     ImagPHI = nullptr;
     PHIsFound = false;
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -836,6 +836,24 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
       return TyBits - 1; // Every always-zero bit is a sign bit.
     break;
   }
+  case TargetOpcode::G_BUILD_VECTOR: {
+    // Collect the known bits that are shared by every demanded vector element.
+    FirstAnswer = TyBits;
+    APInt SingleDemandedElt(1, 1);
+    for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) {
+      if (!DemandedElts[i])
+        continue;
+
+      unsigned Tmp2 = computeNumSignBits(MI.getOperand(i + 1).getReg(),
+                                         SingleDemandedElt, Depth + 1);
+      FirstAnswer = std::min(FirstAnswer, Tmp2);
+
+      // If we don't know any bits, early out.
+      if (FirstAnswer == 1)
+        break;
+    }
+    break;
+  }
   case TargetOpcode::G_INTRINSIC:
   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
   case TargetOpcode::G_INTRINSIC_CONVERGENT:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -16,6 +16,7 @@
 #include "VPlan.h"
 #include "VPlanCFG.h"
 #include "VPlanDominatorTree.h"
+#include "VPlanHelpers.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/TypeSwitch.h"
 
@@ -235,19 +236,25 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
         if (isa<VPPredInstPHIRecipe>(UI))
           continue;
 
-        // If the user is in the same block, check it comes after R in
-        // the block.
-        if (UserVPBB == VPBB) {
-          if (RecipeNumbering[UI] < RecipeNumbering[&R]) {
-            errs() << "Use before def!\n";
-            return false;
-          }
+        // If the user is in the same block, check it comes after R in the
+        // block.
+        if (UI->getParent() == VPBB) {
+          if (RecipeNumbering[UI] >= RecipeNumbering[&R])
+            continue;
+        } else {
+          if (VPDT.dominates(VPBB, UI->getParent()))
+            continue;
         }
 
-        if (!VPDT.dominates(VPBB, UserVPBB)) {
-          errs() << "Use before def!\n";
-          return false;
-        }
+        errs() << "Use before def!\n";
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+        VPSlotTracker Tracker(VPBB->getPlan());
+        UI->print(errs(), "  ", Tracker);
+        errs() << "\n  before\n";
+        R.print(errs(), "  ", Tracker);
+        errs() << "\n";
+#endif
+        return false;
       }
     }
     if (const auto *EVL = dyn_cast<VPInstruction>(&R)) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir
@@ -130,9 +130,9 @@ body:             |
     FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
     %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
     FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
-    %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
+    %13:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
     FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
-    %14:gpr32common = UBFMWri %12, 1, 31
+    %14:gpr32common = UBFMWri %13, 1, 31
     %60:gpr32 = MOVi32imm 1
     %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv
     $w0 = COPY %16
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir
@@ -130,9 +130,9 @@ body:             |
     FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
     %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
     FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
-    %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
+    %13:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
     FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
-    %14:gpr32common = UBFMWri %12, 1, 31
+    %14:gpr32common = UBFMWri %13, 1, 31
     %60:gpr32 = MOVi32imm 1
     %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv
     $w0 = COPY %16
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -312,15 +312,14 @@ define <4 x i32> @nonsplat_shuffleinsert2(<4 x i16> %b, i16 %b0, i16 %b1, i16 %b
 ; CHECK-GI-LABEL: nonsplat_shuffleinsert2:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sxth w8, w0
-; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    mov v1.s[0], w8
-; CHECK-GI-NEXT:    sxth w8, w1
-; CHECK-GI-NEXT:    mov v1.s[1], w8
+; CHECK-GI-NEXT:    sxth w9, w1
+; CHECK-GI-NEXT:    fmov s1, w8
 ; CHECK-GI-NEXT:    sxth w8, w2
-; CHECK-GI-NEXT:    mov v1.s[2], w8
+; CHECK-GI-NEXT:    mov v1.h[1], w9
+; CHECK-GI-NEXT:    mov v1.h[2], w8
 ; CHECK-GI-NEXT:    sxth w8, w3
-; CHECK-GI-NEXT:    mov v1.s[3], w8
-; CHECK-GI-NEXT:    mul v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    mov v1.h[3], w8
+; CHECK-GI-NEXT:    smull v0.4s, v1.4h, v0.4h
 ; CHECK-GI-NEXT:    ret
 entry:
   %s0 = sext i16 %b0 to i32
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-opt-crash.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-opt-crash.ll
@@ -4,10 +4,24 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-ni:1-p2:32:8:8:32-ni:2"
 target triple = "aarch64-arm-none-linux"
 
-; Ensure that a second reduction-like pattern doesn't override the first
-; We don't care what this IR produces, just that it produces something and doesn't cause a crash
+; Ensure that a second reduction-like pattern doesn't override the first.
 define void @reprocessing_crash() #0 {
-; CHECK-LABEL: define void @reprocessing_crash
+; CHECK-LABEL: define void @reprocessing_crash(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer)
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <vscale x 4 x double> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2]] = fsub <vscale x 4 x double> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    br i1 false, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 0
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd <vscale x 2 x double> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 1
+; CHECK-NEXT:    [[BIN_RDX23:%.*]] = fadd <vscale x 2 x double> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    ret void
 ;
 entry:
   br label %vector.body
@@ -28,8 +42,58 @@ middle.block:                                     ; preds = %vector.body
   ret void
 }
 
+; Make sure we don't crash on floating point single reductions. For now, they
+; should be left as-is.
+define double @test_fp_single_reduction(i1 %c) #2 {
+; CHECK-LABEL: define double @test_fp_single_reduction(
+; CHECK-SAME: i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x double> @llvm.vector.interleave2.v8f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer)
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[VEC_PHI218:%.*]] = phi <4 x double> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <8 x double> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x double> zeroinitializer, <8 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2]] = fadd <4 x double> [[VEC_PHI218]], [[STRIDED_VEC]]
+; CHECK-NEXT:    [[TMP3]] = fadd <8 x double> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    br i1 [[C]], label %[[EXIT:.*]], label %[[VECTOR_BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = call { <4 x double>, <4 x double> } @llvm.vector.deinterleave2.v8f64(<8 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x double>, <4 x double> } [[TMP4]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x double>, <4 x double> } [[TMP4]], 1
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP8]])
+; CHECK-NEXT:    [[ADD_1:%.*]] = fadd double [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = fadd double [[ADD_1]], [[TMP9]]
+; CHECK-NEXT:    ret double [[ADD_2]]
+;
+entry:
+  br label %vector.body
+
+vector.body:
+  %vec.phi216 = phi <4 x double> [ zeroinitializer, %entry ], [ %2, %vector.body ]
+  %vec.phi218 = phi <4 x double> [ zeroinitializer, %entry ], [ %1, %vector.body ]
+  %vec.phi222 = phi <4 x double> [ zeroinitializer, %entry ], [ %3, %vector.body ]
+  %strided.vec = shufflevector <8 x double> zeroinitializer, <8 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %strided.vec223 = shufflevector <8 x double> zeroinitializer, <8 x double> zeroinitializer, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %1 = fadd <4 x double> %vec.phi218, %strided.vec
+  %2 = fadd <4 x double> %vec.phi216, %strided.vec
+  %3 = fadd <4 x double> %vec.phi222, %strided.vec223
+  br i1 %c, label %exit, label %vector.body
+
+exit:
+  %4 = tail call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> %2)
+  %5 = tail call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> %1)
+  %6 = tail call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> %3)
+  %add.1 = fadd double %4, %5
+  %add.2 = fadd double %add.1, %6
+  ret double %add.2
+}
+
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>) #1
 
 attributes #0 = { "target-cpu"="neoverse-v1" }
 attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #2 = { "target-cpu"="apple-m1" }
diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp
@@ -692,7 +692,7 @@ TEST_F(AArch64GISelMITest, TestVectorNumSignBitsConstant) {
   EXPECT_EQ(2u, Info.computeNumSignBits(CopyReg32));
   EXPECT_EQ(3u, Info.computeNumSignBits(CopyRegNeg32));
   EXPECT_EQ(3u, Info.computeNumSignBits(NonSplatSameSign));
-  EXPECT_EQ(1u, Info.computeNumSignBits(NonSplatDifferentSign));
+  EXPECT_EQ(2u, Info.computeNumSignBits(NonSplatDifferentSign));
 }
 
 TEST_F(AArch64GISelMITest, TestVectorNumSignBitsSext) {
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
@@ -41,9 +41,17 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefSameBB) {
 #endif
   EXPECT_FALSE(verifyVPlanIsValid(Plan));
 #if GTEST_HAS_STREAM_REDIRECTION
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  EXPECT_STREQ("Use before def!\n"
+               "  EMIT vp<%1> = sub vp<%2>\n"
+               "  before\n"
+               "  EMIT vp<%2> = add ir<0>\n",
+               ::testing::internal::GetCapturedStderr().c_str());
+#else
   EXPECT_STREQ("Use before def!\n",
                ::testing::internal::GetCapturedStderr().c_str());
 #endif
+#endif
 }
 
 TEST_F(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) {
@@ -72,9 +80,17 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) {
 #endif
   EXPECT_FALSE(verifyVPlanIsValid(Plan));
 #if GTEST_HAS_STREAM_REDIRECTION
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  EXPECT_STREQ("Use before def!\n"
+               "  EMIT vp<%1> = sub vp<%3>\n"
+               "  before\n"
+               "  EMIT vp<%3> = add ir<0>\n",
+               ::testing::internal::GetCapturedStderr().c_str());
+#else
   EXPECT_STREQ("Use before def!\n",
                ::testing::internal::GetCapturedStderr().c_str());
 #endif
+#endif
 }
 
 TEST_F(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) {
@@ -112,8 +128,16 @@ TEST_F(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) {
 #endif
   EXPECT_FALSE(verifyVPlanIsValid(Plan));
 #if GTEST_HAS_STREAM_REDIRECTION
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  EXPECT_STREQ("Use before def!\n"
+               "  BLEND ir<<badref>> = vp<%2>\n"
+               "  before\n"
+               "  EMIT vp<%2> = add ir<0>\n",
+               ::testing::internal::GetCapturedStderr().c_str());
+#else
   EXPECT_STREQ("Use before def!\n",
                ::testing::internal::GetCapturedStderr().c_str());
+#endif
 #endif
 
   delete Phi;
diff --git a/mlir/docs/Passes.md b/mlir/docs/Passes.md
@@ -84,6 +84,10 @@ This document describes the available MLIR passes and their contracts.
 
 [include "NVGPUPasses.md"]
 
+## 'quant' Dialect Passes
+
+[include "QuantPasses.md"]
+
 ## Reducer Passes
 
 [include "ReducerPasses.md"]
diff --git a/mlir/lib/TableGen/Interfaces.cpp b/mlir/lib/TableGen/Interfaces.cpp
@@ -51,13 +51,15 @@ bool InterfaceMethod::isStatic() const {
 
 // Return the body for this method if it has one.
 std::optional<StringRef> InterfaceMethod::getBody() const {
-  auto value = def->getValueAsString("body");
+  // Trim leading and trailing spaces from the default implementation.
+  auto value = def->getValueAsString("body").trim();
   return value.empty() ? std::optional<StringRef>() : value;
 }
 
 // Return the default implementation for this method if it has one.
 std::optional<StringRef> InterfaceMethod::getDefaultImplementation() const {
-  auto value = def->getValueAsString("defaultBody");
+  // Trim leading and trailing spaces from the default implementation.
+  auto value = def->getValueAsString("defaultBody").trim();
   return value.empty() ? std::optional<StringRef>() : value;
 }
 
diff --git a/mlir/test/mlir-tblgen/method-body-with-only-spaces.td b/mlir/test/mlir-tblgen/method-body-with-only-spaces.td