Merge branch 'main' into znver4-work

NexusXe · web-flow · commit 0136fd201b0c · 2025-10-01T15:31:29.000-05:00
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
@@ -615,20 +615,20 @@ void CIRRecordLowering::determinePacked(bool nvBaseType) {
       continue;
     // If any member falls at an offset that it not a multiple of its alignment,
     // then the entire record must be packed.
-    if (member.offset % getAlignment(member.data))
+    if (!member.offset.isMultipleOf(getAlignment(member.data)))
       packed = true;
     if (member.offset < nvSize)
       nvAlignment = std::max(nvAlignment, getAlignment(member.data));
     alignment = std::max(alignment, getAlignment(member.data));
   }
   // If the size of the record (the capstone's offset) is not a multiple of the
   // record's alignment, it must be packed.
-  if (members.back().offset % alignment)
+  if (!members.back().offset.isMultipleOf(alignment))
     packed = true;
   // If the non-virtual sub-object is not a multiple of the non-virtual
   // sub-object's alignment, it must be packed.  We cannot have a packed
   // non-virtual sub-object and an unpacked complete object or vise versa.
-  if (nvSize % nvAlignment)
+  if (!nvSize.isMultipleOf(nvAlignment))
     packed = true;
   // Update the alignment of the sentinel.
   if (!packed)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2241,10 +2241,9 @@ class BoUpSLP {
   /// TODO: If load combining is allowed in the IR optimizer, this analysis
   ///       may not be necessary.
   bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
-  bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
-                     ArrayRef<unsigned> Order, const TargetTransformInfo &TTI,
-                     const DataLayout &DL, ScalarEvolution &SE,
-                     const int64_t Diff, StridedPtrInfo &SPtrInfo) const;
+  bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+                     Align Alignment, const int64_t Diff, Value *Ptr0,
+                     Value *PtrN, StridedPtrInfo &SPtrInfo) const;
 
   /// Checks if the given array of loads can be represented as a vectorized,
   /// scatter or just simple gather.
@@ -6824,13 +6823,10 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
 /// 4. Any pointer operand is an instruction with the users outside of the
 /// current graph (for masked gathers extra extractelement instructions
 /// might be required).
-bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
-                            ArrayRef<unsigned> Order,
-                            const TargetTransformInfo &TTI,
-                            const DataLayout &DL, ScalarEvolution &SE,
-                            const int64_t Diff,
-                            StridedPtrInfo &SPtrInfo) const {
-  const size_t Sz = VL.size();
+bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+                            Align Alignment, const int64_t Diff, Value *Ptr0,
+                            Value *PtrN, StridedPtrInfo &SPtrInfo) const {
+  const size_t Sz = PointerOps.size();
   if (Diff % (Sz - 1) != 0)
     return false;
 
@@ -6842,7 +6838,6 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
   });
 
   const uint64_t AbsoluteDiff = std::abs(Diff);
-  Type *ScalarTy = VL.front()->getType();
   auto *VecTy = getWidenedType(ScalarTy, Sz);
   if (IsAnyPointerUsedOutGraph ||
       (AbsoluteDiff > Sz &&
@@ -6853,20 +6848,9 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
     int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
     if (Diff != Stride * static_cast<int64_t>(Sz - 1))
       return false;
-    Align Alignment =
-        cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
-            ->getAlign();
-    if (!TTI.isLegalStridedLoadStore(VecTy, Alignment))
+    if (!TTI->isLegalStridedLoadStore(VecTy, Alignment))
       return false;
-    Value *Ptr0;
-    Value *PtrN;
-    if (Order.empty()) {
-      Ptr0 = PointerOps.front();
-      PtrN = PointerOps.back();
-    } else {
-      Ptr0 = PointerOps[Order.front()];
-      PtrN = PointerOps[Order.back()];
-    }
+
     // Iterate through all pointers and check if all distances are
     // unique multiple of Dist.
     SmallSet<int64_t, 4> Dists;
@@ -6875,14 +6859,14 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
       if (Ptr == PtrN)
         Dist = Diff;
       else if (Ptr != Ptr0)
-        Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
+        Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
       // If the strides are not the same or repeated, we can't
       // vectorize.
       if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
         break;
     }
     if (Dists.size() == Sz) {
-      Type *StrideTy = DL.getIndexType(Ptr0->getType());
+      Type *StrideTy = DL->getIndexType(Ptr0->getType());
       SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
       SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
       return true;
@@ -6971,7 +6955,11 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
                                    cast<Instruction>(V), UserIgnoreList);
                              }))
       return LoadsState::CompressVectorize;
-    if (isStridedLoad(VL, PointerOps, Order, *TTI, *DL, *SE, *Diff, SPtrInfo))
+    Align Alignment =
+        cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
+            ->getAlign();
+    if (isStridedLoad(PointerOps, ScalarTy, Alignment, *Diff, Ptr0, PtrN,
+                      SPtrInfo))
       return LoadsState::StridedVectorize;
   }
   if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
@@ -71,7 +71,6 @@ set(LLVM_TEST_DEPENDS
   ${LLVM_TEST_DEPENDS_COMMON}
   BugpointPasses
   LLVMWindowsDriver
-  UnitTests
   bugpoint
   llc
   lli
@@ -270,10 +269,11 @@ add_lit_testsuites(LLVM ${CMAKE_CURRENT_SOURCE_DIR}
   ${exclude_from_check_all}
   DEPENDS ${LLVM_TEST_DEPENDS}
   FOLDER "Tests/Subdirectories"
-  SKIP "^FileCheck" "^TableGen"
+  SKIP "^FileCheck" "^TableGen" "^Unit"
   )
 add_subdirectory(FileCheck)
 add_subdirectory(TableGen)
+add_subdirectory(Unit)
 
 # Setup an alias for 'check-all'.
 add_custom_target(check)
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll
@@ -527,23 +527,14 @@ define void @rt_stride_1_with_reordering(ptr %pl, i64 %stride, ptr %ps) {
   ret void
 }
 
-; TODO: We want to generate this code:
-; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
-; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0
-; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
-; %strided_load = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 8, <4 x i1> splat (i1 true), i32 4)
-; %bitcast_ = bitcast <4 x i32> %strided_load to <16 x i8>
-; store <16 x i8> %bitcast_, ptr %gep_s0, align 1
-; ret void
-; }
-define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
-; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
+define void @constant_stride_masked_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
+; CHECK-LABEL: define void @constant_stride_masked_no_reordering(
 ; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
 ; CHECK-NEXT:    [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <28 x i8> @llvm.masked.load.v28i8.p0(ptr [[GEP_L0]], i32 1, <28 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <28 x i8> poison)
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
-; CHECK-NEXT:    store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
+; CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr [[GEP_S0]], align 1
 ; CHECK-NEXT:    ret void
 ;
   %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
@@ -617,6 +608,107 @@ define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps)
   ret void
 }
 
+; TODO: We want to generate this code:
+; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) #0 {
+;   %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
+;   %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
+;   %1 = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 100, <4 x i1> splat (i1 true), i32 4)
+;   %2 = bitcast <4 x i32> %1 to <16 x i8>
+;   store <16 x i8> %2, ptr %gep_s0, align 1
+;   ret void
+; }
+define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
+; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
+; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
+; CHECK-NEXT:    [[GEP_L4:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 100
+; CHECK-NEXT:    [[GEP_L8:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 200
+; CHECK-NEXT:    [[GEP_L12:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 300
+; CHECK-NEXT:    [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[GEP_L0]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_L4]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_L8]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[GEP_L12]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:    store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1
+; CHECK-NEXT:    ret void
+;
+  %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
+  %gep_l1 = getelementptr inbounds i8, ptr %pl, i64 1
+  %gep_l2 = getelementptr inbounds i8, ptr %pl, i64 2
+  %gep_l3 = getelementptr inbounds i8, ptr %pl, i64 3
+  %gep_l4 = getelementptr inbounds i8, ptr %pl, i64 100
+  %gep_l5 = getelementptr inbounds i8, ptr %pl, i64 101
+  %gep_l6 = getelementptr inbounds i8, ptr %pl, i64 102
+  %gep_l7 = getelementptr inbounds i8, ptr %pl, i64 103
+  %gep_l8 = getelementptr inbounds i8, ptr %pl, i64 200
+  %gep_l9 = getelementptr inbounds i8, ptr %pl, i64 201
+  %gep_l10 = getelementptr inbounds i8, ptr %pl, i64 202
+  %gep_l11 = getelementptr inbounds i8, ptr %pl, i64 203
+  %gep_l12 = getelementptr inbounds i8, ptr %pl, i64 300
+  %gep_l13 = getelementptr inbounds i8, ptr %pl, i64 301
+  %gep_l14 = getelementptr inbounds i8, ptr %pl, i64 302
+  %gep_l15 = getelementptr inbounds i8, ptr %pl, i64 303
+
+  %load0  = load i8, ptr %gep_l0 , align 1
+  %load1  = load i8, ptr %gep_l1 , align 1
+  %load2  = load i8, ptr %gep_l2 , align 1
+  %load3  = load i8, ptr %gep_l3 , align 1
+  %load4  = load i8, ptr %gep_l4 , align 1
+  %load5  = load i8, ptr %gep_l5 , align 1
+  %load6  = load i8, ptr %gep_l6 , align 1
+  %load7  = load i8, ptr %gep_l7 , align 1
+  %load8  = load i8, ptr %gep_l8 , align 1
+  %load9  = load i8, ptr %gep_l9 , align 1
+  %load10 = load i8, ptr %gep_l10, align 1
+  %load11 = load i8, ptr %gep_l11, align 1
+  %load12 = load i8, ptr %gep_l12, align 1
+  %load13 = load i8, ptr %gep_l13, align 1
+  %load14 = load i8, ptr %gep_l14, align 1
+  %load15 = load i8, ptr %gep_l15, align 1
+
+  %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
+  %gep_s1 = getelementptr inbounds i8, ptr %ps, i64 1
+  %gep_s2 = getelementptr inbounds i8, ptr %ps, i64 2
+  %gep_s3 = getelementptr inbounds i8, ptr %ps, i64 3
+  %gep_s4 = getelementptr inbounds i8, ptr %ps, i64 4
+  %gep_s5 = getelementptr inbounds i8, ptr %ps, i64 5
+  %gep_s6 = getelementptr inbounds i8, ptr %ps, i64 6
+  %gep_s7 = getelementptr inbounds i8, ptr %ps, i64 7
+  %gep_s8 = getelementptr inbounds i8, ptr %ps, i64 8
+  %gep_s9 = getelementptr inbounds i8, ptr %ps, i64 9
+  %gep_s10 = getelementptr inbounds i8, ptr %ps, i64 10
+  %gep_s11 = getelementptr inbounds i8, ptr %ps, i64 11
+  %gep_s12 = getelementptr inbounds i8, ptr %ps, i64 12
+  %gep_s13 = getelementptr inbounds i8, ptr %ps, i64 13
+  %gep_s14 = getelementptr inbounds i8, ptr %ps, i64 14
+  %gep_s15 = getelementptr inbounds i8, ptr %ps, i64 15
+
+  store i8 %load0, ptr %gep_s0, align 1
+  store i8 %load1, ptr %gep_s1, align 1
+  store i8 %load2, ptr %gep_s2, align 1
+  store i8 %load3, ptr %gep_s3, align 1
+  store i8 %load4, ptr %gep_s4, align 1
+  store i8 %load5, ptr %gep_s5, align 1
+  store i8 %load6, ptr %gep_s6, align 1
+  store i8 %load7, ptr %gep_s7, align 1
+  store i8 %load8, ptr %gep_s8, align 1
+  store i8 %load9, ptr %gep_s9, align 1
+  store i8 %load10, ptr %gep_s10, align 1
+  store i8 %load11, ptr %gep_s11, align 1
+  store i8 %load12, ptr %gep_s12, align 1
+  store i8 %load13, ptr %gep_s13, align 1
+  store i8 %load14, ptr %gep_s14, align 1
+  store i8 %load15, ptr %gep_s15, align 1
+
+  ret void
+}
 ; TODO: We want to generate this code:
 ; define void @rt_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
 ; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0
diff --git a/llvm/test/Unit/CMakeLists.txt b/llvm/test/Unit/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_lit_testsuite(check-llvm-unit "Running lit suite for LLVM unit tests"
+  ${CMAKE_CURRENT_BINARY_DIR}
+  EXCLUDE_FROM_CHECK_ALL
+  DEPENDS UnitTests
+  )