Skip to content

Commit 7834708

Browse files
committed
Improve opt test
1 parent a5c7d7d commit 7834708

File tree

2 files changed

+116
-369
lines changed

2 files changed

+116
-369
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -force-vector-interleave=1 %s | FileCheck %s
3+
define dso_local void @alias_mask(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
4+
; CHECK-LABEL: define dso_local void @alias_mask(
5+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[B4:%.*]] = ptrtoint ptr [[B]] to i64
8+
; CHECK-NEXT: [[C3:%.*]] = ptrtoint ptr [[C]] to i64
9+
; CHECK-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64
10+
; CHECK-NEXT: [[C1:%.*]] = ptrtoint ptr [[C]] to i64
11+
; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N]], 0
12+
; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
13+
; CHECK: for.body.preheader:
14+
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
15+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
16+
; CHECK: vector.memcheck:
17+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
18+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
19+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[C1]], [[B2]]
20+
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
21+
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
22+
; CHECK: vector.ph:
23+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
24+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 16
25+
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1
26+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP5]]
27+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
28+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
29+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
30+
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 16
31+
; CHECK-NEXT: [[SUB_DIFF:%.*]] = sub i64 [[B4]], [[C3]]
32+
; CHECK-NEXT: [[DIFF:%.*]] = sdiv i64 [[SUB_DIFF]], 1
33+
; CHECK-NEXT: [[NEG_COMPARE:%.*]] = icmp slt i64 [[DIFF]], 0
34+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i1> poison, i1 [[NEG_COMPARE]], i64 0
35+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i1> [[DOTSPLATINSERT]], <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
36+
; CHECK-NEXT: [[PTR_DIFF_LANE_MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[DIFF]])
37+
; CHECK-NEXT: [[TMP8:%.*]] = or <vscale x 16 x i1> [[PTR_DIFF_LANE_MASK]], [[DOTSPLAT]]
38+
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
39+
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 16
40+
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[TMP10]]
41+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[WIDE_TRIP_COUNT]], [[TMP10]]
42+
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 [[TMP11]], i64 0
43+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
44+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
45+
; CHECK: vector.body:
46+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
47+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
48+
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 0
49+
; CHECK-NEXT: [[TMP15:%.*]] = and <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], [[TMP8]]
50+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP14]]
51+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
52+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP17]], i32 1, <vscale x 16 x i1> [[TMP15]], <vscale x 16 x i8> poison)
53+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
54+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 0
55+
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP19]], i32 1, <vscale x 16 x i1> [[TMP15]], <vscale x 16 x i8> poison)
56+
; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD5]], [[WIDE_MASKED_LOAD]]
57+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP14]]
58+
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP21]], i32 0
59+
; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP20]], ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[TMP15]])
60+
; CHECK-NEXT: [[TMP23:%.*]] = zext <vscale x 16 x i1> [[TMP8]] to <vscale x 16 x i8>
61+
; CHECK-NEXT: [[TMP24:%.*]] = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> [[TMP23]])
62+
; CHECK-NEXT: [[TMP25:%.*]] = zext i8 [[TMP24]] to i64
63+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP25]]
64+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP13]])
65+
; CHECK-NEXT: [[TMP26:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer)
66+
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <vscale x 16 x i1> [[TMP26]], i32 0
67+
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
68+
; CHECK: middle.block:
69+
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
70+
; CHECK: scalar.ph:
71+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
72+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
73+
; CHECK: for.cond.cleanup.loopexit:
74+
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
75+
; CHECK: for.cond.cleanup:
76+
; CHECK-NEXT: ret void
77+
; CHECK: for.body:
78+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
79+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
80+
; CHECK-NEXT: [[TMP28:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
81+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDVARS_IV]]
82+
; CHECK-NEXT: [[TMP29:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
83+
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP29]], [[TMP28]]
84+
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDVARS_IV]]
85+
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX6]], align 1
86+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
87+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
88+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
89+
;
90+
entry:
91+
%cmp11 = icmp sgt i32 %n, 0
92+
br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup
93+
94+
for.body.preheader: ; preds = %entry
95+
%wide.trip.count = zext nneg i32 %n to i64
96+
br label %for.body
97+
98+
for.cond.cleanup.loopexit: ; preds = %for.body
99+
br label %for.cond.cleanup
100+
101+
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
102+
ret void
103+
104+
for.body: ; preds = %for.body.preheader, %for.body
105+
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
106+
%arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
107+
%0 = load i8, ptr %arrayidx, align 1
108+
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
109+
%1 = load i8, ptr %arrayidx2, align 1
110+
%add = add i8 %1, %0
111+
%arrayidx6 = getelementptr inbounds i8, ptr %c, i64 %indvars.iv
112+
store i8 %add, ptr %arrayidx6, align 1
113+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
114+
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
115+
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
116+
}

0 commit comments

Comments
 (0)