Skip to content

Commit 0136fd2

Browse files
authored
Merge branch 'main' into znver4-work
2 parents d300f5e + 9a30ada commit 0136fd2

File tree

5 files changed

+131
-46
lines changed

5 files changed

+131
-46
lines changed

clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -615,20 +615,20 @@ void CIRRecordLowering::determinePacked(bool nvBaseType) {
615615
continue;
616616
// If any member falls at an offset that it not a multiple of its alignment,
617617
// then the entire record must be packed.
618-
if (member.offset % getAlignment(member.data))
618+
if (!member.offset.isMultipleOf(getAlignment(member.data)))
619619
packed = true;
620620
if (member.offset < nvSize)
621621
nvAlignment = std::max(nvAlignment, getAlignment(member.data));
622622
alignment = std::max(alignment, getAlignment(member.data));
623623
}
624624
// If the size of the record (the capstone's offset) is not a multiple of the
625625
// record's alignment, it must be packed.
626-
if (members.back().offset % alignment)
626+
if (!members.back().offset.isMultipleOf(alignment))
627627
packed = true;
628628
// If the non-virtual sub-object is not a multiple of the non-virtual
629629
// sub-object's alignment, it must be packed. We cannot have a packed
630630
// non-virtual sub-object and an unpacked complete object or vise versa.
631-
if (nvSize % nvAlignment)
631+
if (!nvSize.isMultipleOf(nvAlignment))
632632
packed = true;
633633
// Update the alignment of the sentinel.
634634
if (!packed)

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2241,10 +2241,9 @@ class BoUpSLP {
22412241
/// TODO: If load combining is allowed in the IR optimizer, this analysis
22422242
/// may not be necessary.
22432243
bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
2244-
bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
2245-
ArrayRef<unsigned> Order, const TargetTransformInfo &TTI,
2246-
const DataLayout &DL, ScalarEvolution &SE,
2247-
const int64_t Diff, StridedPtrInfo &SPtrInfo) const;
2244+
bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
2245+
Align Alignment, const int64_t Diff, Value *Ptr0,
2246+
Value *PtrN, StridedPtrInfo &SPtrInfo) const;
22482247

22492248
/// Checks if the given array of loads can be represented as a vectorized,
22502249
/// scatter or just simple gather.
@@ -6824,13 +6823,10 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
68246823
/// 4. Any pointer operand is an instruction with the users outside of the
68256824
/// current graph (for masked gathers extra extractelement instructions
68266825
/// might be required).
6827-
bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
6828-
ArrayRef<unsigned> Order,
6829-
const TargetTransformInfo &TTI,
6830-
const DataLayout &DL, ScalarEvolution &SE,
6831-
const int64_t Diff,
6832-
StridedPtrInfo &SPtrInfo) const {
6833-
const size_t Sz = VL.size();
6826+
bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
6827+
Align Alignment, const int64_t Diff, Value *Ptr0,
6828+
Value *PtrN, StridedPtrInfo &SPtrInfo) const {
6829+
const size_t Sz = PointerOps.size();
68346830
if (Diff % (Sz - 1) != 0)
68356831
return false;
68366832

@@ -6842,7 +6838,6 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
68426838
});
68436839

68446840
const uint64_t AbsoluteDiff = std::abs(Diff);
6845-
Type *ScalarTy = VL.front()->getType();
68466841
auto *VecTy = getWidenedType(ScalarTy, Sz);
68476842
if (IsAnyPointerUsedOutGraph ||
68486843
(AbsoluteDiff > Sz &&
@@ -6853,20 +6848,9 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
68536848
int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
68546849
if (Diff != Stride * static_cast<int64_t>(Sz - 1))
68556850
return false;
6856-
Align Alignment =
6857-
cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
6858-
->getAlign();
6859-
if (!TTI.isLegalStridedLoadStore(VecTy, Alignment))
6851+
if (!TTI->isLegalStridedLoadStore(VecTy, Alignment))
68606852
return false;
6861-
Value *Ptr0;
6862-
Value *PtrN;
6863-
if (Order.empty()) {
6864-
Ptr0 = PointerOps.front();
6865-
PtrN = PointerOps.back();
6866-
} else {
6867-
Ptr0 = PointerOps[Order.front()];
6868-
PtrN = PointerOps[Order.back()];
6869-
}
6853+
68706854
// Iterate through all pointers and check if all distances are
68716855
// unique multiple of Dist.
68726856
SmallSet<int64_t, 4> Dists;
@@ -6875,14 +6859,14 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
68756859
if (Ptr == PtrN)
68766860
Dist = Diff;
68776861
else if (Ptr != Ptr0)
6878-
Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
6862+
Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
68796863
// If the strides are not the same or repeated, we can't
68806864
// vectorize.
68816865
if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
68826866
break;
68836867
}
68846868
if (Dists.size() == Sz) {
6885-
Type *StrideTy = DL.getIndexType(Ptr0->getType());
6869+
Type *StrideTy = DL->getIndexType(Ptr0->getType());
68866870
SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
68876871
SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
68886872
return true;
@@ -6971,7 +6955,11 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
69716955
cast<Instruction>(V), UserIgnoreList);
69726956
}))
69736957
return LoadsState::CompressVectorize;
6974-
if (isStridedLoad(VL, PointerOps, Order, *TTI, *DL, *SE, *Diff, SPtrInfo))
6958+
Align Alignment =
6959+
cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
6960+
->getAlign();
6961+
if (isStridedLoad(PointerOps, ScalarTy, Alignment, *Diff, Ptr0, PtrN,
6962+
SPtrInfo))
69756963
return LoadsState::StridedVectorize;
69766964
}
69776965
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||

llvm/test/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ set(LLVM_TEST_DEPENDS
7171
${LLVM_TEST_DEPENDS_COMMON}
7272
BugpointPasses
7373
LLVMWindowsDriver
74-
UnitTests
7574
bugpoint
7675
llc
7776
lli
@@ -270,10 +269,11 @@ add_lit_testsuites(LLVM ${CMAKE_CURRENT_SOURCE_DIR}
270269
${exclude_from_check_all}
271270
DEPENDS ${LLVM_TEST_DEPENDS}
272271
FOLDER "Tests/Subdirectories"
273-
SKIP "^FileCheck" "^TableGen"
272+
SKIP "^FileCheck" "^TableGen" "^Unit"
274273
)
275274
add_subdirectory(FileCheck)
276275
add_subdirectory(TableGen)
276+
add_subdirectory(Unit)
277277

278278
# Setup an alias for 'check-all'.
279279
add_custom_target(check)

llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll

Lines changed: 105 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -527,23 +527,14 @@ define void @rt_stride_1_with_reordering(ptr %pl, i64 %stride, ptr %ps) {
527527
ret void
528528
}
529529

530-
; TODO: We want to generate this code:
531-
; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
532-
; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0
533-
; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
534-
; %strided_load = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 8, <4 x i1> splat (i1 true), i32 4)
535-
; %bitcast_ = bitcast <4 x i32> %strided_load to <16 x i8>
536-
; store <16 x i8> %bitcast_, ptr %gep_s0, align 1
537-
; ret void
538-
; }
539-
define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
540-
; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
530+
define void @constant_stride_masked_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
531+
; CHECK-LABEL: define void @constant_stride_masked_no_reordering(
541532
; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
542533
; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
543534
; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
544535
; CHECK-NEXT: [[TMP1:%.*]] = call <28 x i8> @llvm.masked.load.v28i8.p0(ptr [[GEP_L0]], i32 1, <28 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <28 x i8> poison)
545-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
546-
; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1
536+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
537+
; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[GEP_S0]], align 1
547538
; CHECK-NEXT: ret void
548539
;
549540
%gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
@@ -617,6 +608,107 @@ define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps)
617608
ret void
618609
}
619610

611+
; TODO: We want to generate this code:
612+
; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) #0 {
613+
; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
614+
; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
615+
; %1 = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 100, <4 x i1> splat (i1 true), i32 4)
616+
; %2 = bitcast <4 x i32> %1 to <16 x i8>
617+
; store <16 x i8> %2, ptr %gep_s0, align 1
618+
; ret void
619+
; }
620+
define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
621+
; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
622+
; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
623+
; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
624+
; CHECK-NEXT: [[GEP_L4:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 100
625+
; CHECK-NEXT: [[GEP_L8:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 200
626+
; CHECK-NEXT: [[GEP_L12:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 300
627+
; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
628+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[GEP_L0]], align 1
629+
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_L4]], align 1
630+
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_L8]], align 1
631+
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[GEP_L12]], align 1
632+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
633+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
634+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
635+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
636+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
637+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
638+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
639+
; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1
640+
; CHECK-NEXT: ret void
641+
;
642+
%gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
643+
%gep_l1 = getelementptr inbounds i8, ptr %pl, i64 1
644+
%gep_l2 = getelementptr inbounds i8, ptr %pl, i64 2
645+
%gep_l3 = getelementptr inbounds i8, ptr %pl, i64 3
646+
%gep_l4 = getelementptr inbounds i8, ptr %pl, i64 100
647+
%gep_l5 = getelementptr inbounds i8, ptr %pl, i64 101
648+
%gep_l6 = getelementptr inbounds i8, ptr %pl, i64 102
649+
%gep_l7 = getelementptr inbounds i8, ptr %pl, i64 103
650+
%gep_l8 = getelementptr inbounds i8, ptr %pl, i64 200
651+
%gep_l9 = getelementptr inbounds i8, ptr %pl, i64 201
652+
%gep_l10 = getelementptr inbounds i8, ptr %pl, i64 202
653+
%gep_l11 = getelementptr inbounds i8, ptr %pl, i64 203
654+
%gep_l12 = getelementptr inbounds i8, ptr %pl, i64 300
655+
%gep_l13 = getelementptr inbounds i8, ptr %pl, i64 301
656+
%gep_l14 = getelementptr inbounds i8, ptr %pl, i64 302
657+
%gep_l15 = getelementptr inbounds i8, ptr %pl, i64 303
658+
659+
%load0 = load i8, ptr %gep_l0 , align 1
660+
%load1 = load i8, ptr %gep_l1 , align 1
661+
%load2 = load i8, ptr %gep_l2 , align 1
662+
%load3 = load i8, ptr %gep_l3 , align 1
663+
%load4 = load i8, ptr %gep_l4 , align 1
664+
%load5 = load i8, ptr %gep_l5 , align 1
665+
%load6 = load i8, ptr %gep_l6 , align 1
666+
%load7 = load i8, ptr %gep_l7 , align 1
667+
%load8 = load i8, ptr %gep_l8 , align 1
668+
%load9 = load i8, ptr %gep_l9 , align 1
669+
%load10 = load i8, ptr %gep_l10, align 1
670+
%load11 = load i8, ptr %gep_l11, align 1
671+
%load12 = load i8, ptr %gep_l12, align 1
672+
%load13 = load i8, ptr %gep_l13, align 1
673+
%load14 = load i8, ptr %gep_l14, align 1
674+
%load15 = load i8, ptr %gep_l15, align 1
675+
676+
%gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
677+
%gep_s1 = getelementptr inbounds i8, ptr %ps, i64 1
678+
%gep_s2 = getelementptr inbounds i8, ptr %ps, i64 2
679+
%gep_s3 = getelementptr inbounds i8, ptr %ps, i64 3
680+
%gep_s4 = getelementptr inbounds i8, ptr %ps, i64 4
681+
%gep_s5 = getelementptr inbounds i8, ptr %ps, i64 5
682+
%gep_s6 = getelementptr inbounds i8, ptr %ps, i64 6
683+
%gep_s7 = getelementptr inbounds i8, ptr %ps, i64 7
684+
%gep_s8 = getelementptr inbounds i8, ptr %ps, i64 8
685+
%gep_s9 = getelementptr inbounds i8, ptr %ps, i64 9
686+
%gep_s10 = getelementptr inbounds i8, ptr %ps, i64 10
687+
%gep_s11 = getelementptr inbounds i8, ptr %ps, i64 11
688+
%gep_s12 = getelementptr inbounds i8, ptr %ps, i64 12
689+
%gep_s13 = getelementptr inbounds i8, ptr %ps, i64 13
690+
%gep_s14 = getelementptr inbounds i8, ptr %ps, i64 14
691+
%gep_s15 = getelementptr inbounds i8, ptr %ps, i64 15
692+
693+
store i8 %load0, ptr %gep_s0, align 1
694+
store i8 %load1, ptr %gep_s1, align 1
695+
store i8 %load2, ptr %gep_s2, align 1
696+
store i8 %load3, ptr %gep_s3, align 1
697+
store i8 %load4, ptr %gep_s4, align 1
698+
store i8 %load5, ptr %gep_s5, align 1
699+
store i8 %load6, ptr %gep_s6, align 1
700+
store i8 %load7, ptr %gep_s7, align 1
701+
store i8 %load8, ptr %gep_s8, align 1
702+
store i8 %load9, ptr %gep_s9, align 1
703+
store i8 %load10, ptr %gep_s10, align 1
704+
store i8 %load11, ptr %gep_s11, align 1
705+
store i8 %load12, ptr %gep_s12, align 1
706+
store i8 %load13, ptr %gep_s13, align 1
707+
store i8 %load14, ptr %gep_s14, align 1
708+
store i8 %load15, ptr %gep_s15, align 1
709+
710+
ret void
711+
}
620712
; TODO: We want to generate this code:
621713
; define void @rt_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
622714
; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0

llvm/test/Unit/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
add_lit_testsuite(check-llvm-unit "Running lit suite for LLVM unit tests"
2+
${CMAKE_CURRENT_BINARY_DIR}
3+
EXCLUDE_FROM_CHECK_ALL
4+
DEPENDS UnitTests
5+
)

0 commit comments

Comments
 (0)