Skip to content

Commit 9688f88

Browse files
authored
[LV] Pre-commit test for #128062 (#164801)
In preparation to extend the work done by dfa665f ([VPlan] Add transformation to narrow interleave groups) to make the narrowing more powerful, pre-commit a test case from #128062.
1 parent 37f7b31 commit 9688f88

File tree

1 file changed

+201
-0
lines changed

1 file changed

+201
-0
lines changed
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
2+
; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-interleaved-mem-accesses -S | FileCheck %s
3+
4+
define void @pr128062(ptr %dst.start, i8 %a, i16 %b) {
5+
; CHECK-LABEL: define void @pr128062(
6+
; CHECK-SAME: ptr [[DST_START:%.*]], i8 [[A:%.*]], i16 [[B:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
9+
; CHECK: [[VECTOR_PH]]:
10+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
11+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
12+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
13+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
14+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
15+
; CHECK: [[VECTOR_BODY]]:
16+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
17+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
18+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[OFFSET_IDX]]
19+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
20+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
21+
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
22+
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
23+
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
24+
; CHECK-NEXT: [[TMP0:%.*]] = zext <4 x i8> [[STRIDED_VEC]] to <4 x i16>
25+
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i16> [[TMP0]], [[BROADCAST_SPLAT]]
26+
; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255)
27+
; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8>
28+
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]]
29+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[STRIDED_VEC3]] to <4 x i16>
30+
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP5]], [[BROADCAST_SPLAT]]
31+
; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255)
32+
; CHECK-NEXT: [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8>
33+
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]]
34+
; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16>
35+
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]]
36+
; CHECK-NEXT: [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255)
37+
; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8>
38+
; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]]
39+
; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16>
40+
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]]
41+
; CHECK-NEXT: [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255)
42+
; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8>
43+
; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]]
44+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
45+
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
46+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
47+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
48+
; CHECK-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1
49+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
50+
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
51+
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
52+
; CHECK: [[MIDDLE_BLOCK]]:
53+
; CHECK-NEXT: br label %[[EXIT:.*]]
54+
; CHECK: [[EXIT]]:
55+
; CHECK-NEXT: ret void
56+
;
57+
entry:
58+
br label %loop
59+
60+
loop:
61+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
62+
%dst = phi ptr [ %dst.start, %entry ], [ %dst.next, %loop ]
63+
%dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4
64+
%load.dst = load i8, ptr %dst, align 1
65+
%dst.ext = zext i8 %load.dst to i16
66+
%mul.dst.0 = mul nuw i16 %dst.ext, %b
67+
%udiv.0 = udiv i16 %mul.dst.0, 255
68+
%trunc.0 = trunc nuw i16 %udiv.0 to i8
69+
%val.0 = add i8 %a, %trunc.0
70+
store i8 %val.0, ptr %dst, align 1
71+
%gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1
72+
%load.dst.1 = load i8, ptr %gep.dst.1, align 1
73+
%dst.1.ext = zext i8 %load.dst.1 to i16
74+
%mul.dst.1 = mul nuw i16 %dst.1.ext, %b
75+
%udiv.1 = udiv i16 %mul.dst.1, 255
76+
%trunc.1 = trunc nuw i16 %udiv.1 to i8
77+
%val.1 = add i8 %a, %trunc.1
78+
store i8 %val.1, ptr %gep.dst.1, align 1
79+
%gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2
80+
%load.dst.2 = load i8, ptr %gep.dst.2, align 1
81+
%dst.2.ext = zext i8 %load.dst.2 to i16
82+
%mul.dst.2 = mul nuw i16 %dst.2.ext, %b
83+
%udiv.2 = udiv i16 %mul.dst.2, 255
84+
%trunc.2 = trunc nuw i16 %udiv.2 to i8
85+
%val.2 = add i8 %a, %trunc.2
86+
store i8 %val.2, ptr %gep.dst.2, align 1
87+
%gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3
88+
%load.dst.3 = load i8, ptr %gep.dst.3, align 1
89+
%dst.3.ext = zext i8 %load.dst.3 to i16
90+
%mul.dst.3 = mul nuw i16 %dst.3.ext, %b
91+
%udiv.3 = udiv i16 %mul.dst.3, 255
92+
%trunc.3 = trunc nuw i16 %udiv.3 to i8
93+
%val.3 = add i8 %a, %trunc.3
94+
store i8 %val.3, ptr %gep.dst.3, align 1
95+
%iv.next = add i64 %iv, 4
96+
%exit.cond = icmp eq i64 %iv.next, 256
97+
br i1 %exit.cond, label %exit, label %loop
98+
99+
exit:
100+
ret void
101+
}
102+
103+
; Same as above, except one zext is replaced with an sext.
104+
define void @opcode_mismatch(ptr %dst.start, i8 %a, i16 %b) {
105+
; CHECK-LABEL: define void @opcode_mismatch(
106+
; CHECK-SAME: ptr [[DST_START:%.*]], i8 [[A:%.*]], i16 [[B:%.*]]) {
107+
; CHECK-NEXT: [[ENTRY:.*:]]
108+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
109+
; CHECK: [[VECTOR_PH]]:
110+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
111+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
112+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
113+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
114+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
115+
; CHECK: [[VECTOR_BODY]]:
116+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
117+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
118+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[OFFSET_IDX]]
119+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
120+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
121+
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
122+
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
123+
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
124+
; CHECK-NEXT: [[TMP0:%.*]] = zext <4 x i8> [[STRIDED_VEC]] to <4 x i16>
125+
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i16> [[TMP0]], [[BROADCAST_SPLAT]]
126+
; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255)
127+
; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8>
128+
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]]
129+
; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i8> [[STRIDED_VEC3]] to <4 x i16>
130+
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP5]], [[BROADCAST_SPLAT]]
131+
; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255)
132+
; CHECK-NEXT: [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8>
133+
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]]
134+
; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16>
135+
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]]
136+
; CHECK-NEXT: [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255)
137+
; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8>
138+
; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]]
139+
; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16>
140+
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]]
141+
; CHECK-NEXT: [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255)
142+
; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8>
143+
; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]]
144+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
145+
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
146+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
147+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
148+
; CHECK-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1
149+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
150+
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
151+
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
152+
; CHECK: [[MIDDLE_BLOCK]]:
153+
; CHECK-NEXT: br label %[[EXIT:.*]]
154+
; CHECK: [[EXIT]]:
155+
; CHECK-NEXT: ret void
156+
;
157+
entry:
158+
br label %loop
159+
160+
loop:
161+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
162+
%dst = phi ptr [ %dst.start, %entry ], [ %dst.next, %loop ]
163+
%dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4
164+
%load.dst = load i8, ptr %dst, align 1
165+
%dst.ext = zext i8 %load.dst to i16
166+
%mul.dst.0 = mul nuw i16 %dst.ext, %b
167+
%udiv.0 = udiv i16 %mul.dst.0, 255
168+
%trunc.0 = trunc nuw i16 %udiv.0 to i8
169+
%val.0 = add i8 %a, %trunc.0
170+
store i8 %val.0, ptr %dst, align 1
171+
%gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1
172+
%load.dst.1 = load i8, ptr %gep.dst.1, align 1
173+
%dst.1.ext = sext i8 %load.dst.1 to i16
174+
%mul.dst.1 = mul nuw i16 %dst.1.ext, %b
175+
%udiv.1 = udiv i16 %mul.dst.1, 255
176+
%trunc.1 = trunc nuw i16 %udiv.1 to i8
177+
%val.1 = add i8 %a, %trunc.1
178+
store i8 %val.1, ptr %gep.dst.1, align 1
179+
%gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2
180+
%load.dst.2 = load i8, ptr %gep.dst.2, align 1
181+
%dst.2.ext = zext i8 %load.dst.2 to i16
182+
%mul.dst.2 = mul nuw i16 %dst.2.ext, %b
183+
%udiv.2 = udiv i16 %mul.dst.2, 255
184+
%trunc.2 = trunc nuw i16 %udiv.2 to i8
185+
%val.2 = add i8 %a, %trunc.2
186+
store i8 %val.2, ptr %gep.dst.2, align 1
187+
%gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3
188+
%load.dst.3 = load i8, ptr %gep.dst.3, align 1
189+
%dst.3.ext = zext i8 %load.dst.3 to i16
190+
%mul.dst.3 = mul nuw i16 %dst.3.ext, %b
191+
%udiv.3 = udiv i16 %mul.dst.3, 255
192+
%trunc.3 = trunc nuw i16 %udiv.3 to i8
193+
%val.3 = add i8 %a, %trunc.3
194+
store i8 %val.3, ptr %gep.dst.3, align 1
195+
%iv.next = add i64 %iv, 4
196+
%exit.cond = icmp eq i64 %iv.next, 256
197+
br i1 %exit.cond, label %exit, label %loop
198+
199+
exit:
200+
ret void
201+
}

0 commit comments

Comments
 (0)