Skip to content

Commit 12a1ca1

Browse files
committed
[LV] Pre-commit test for #128062
In preparation to extend the work done by dfa665f ([VPlan] Add transformation to narrow interleave groups) to make the narrowing more powerful, pre-commit a test case from #128062.
1 parent 6058c0c commit 12a1ca1

File tree

1 file changed

+253
-0
lines changed

1 file changed

+253
-0
lines changed
Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 6
2+
; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
3+
; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-interleaved-mem-accesses -S | FileCheck --check-prefix=INTERLEAVED %s
4+
5+
define void @pr128062(ptr %dst.start, i64 %n, i8 %a) {
6+
; CHECK-LABEL: define void @pr128062(
7+
; CHECK-SAME: ptr [[DST_START:%.*]], i64 [[N:%.*]], i8 [[A:%.*]]) {
8+
; CHECK-NEXT: [[ENTRY:.*:]]
9+
; CHECK-NEXT: [[IV_START:%.*]] = and i64 [[N]], -4
10+
; CHECK-NEXT: [[A_EXT:%.*]] = zext i8 [[A]] to i16
11+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[IV_START]], -4
12+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
13+
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
14+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
15+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
16+
; CHECK: [[VECTOR_PH]]:
17+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
18+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
19+
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -4
20+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[IV_START]], [[TMP3]]
21+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 4
22+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP5]]
23+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A_EXT]], i64 0
24+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
25+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
26+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
27+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
28+
; CHECK: [[VECTOR_BODY]]:
29+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
30+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
31+
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
32+
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
33+
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 8
34+
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12
35+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP7]]
36+
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP8]]
37+
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP9]]
38+
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP10]]
39+
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[NEXT_GEP]], align 1
40+
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[NEXT_GEP3]], align 1
41+
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[NEXT_GEP4]], align 1
42+
; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[NEXT_GEP5]], align 1
43+
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i8> poison, i8 [[TMP11]], i32 0
44+
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP12]], i32 1
45+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 2
46+
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 3
47+
; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i8> [[TMP18]] to <4 x i16>
48+
; CHECK-NEXT: [[TMP20:%.*]] = mul nuw <4 x i16> [[TMP19]], [[BROADCAST_SPLAT]]
49+
; CHECK-NEXT: [[TMP21:%.*]] = udiv <4 x i16> [[TMP20]], splat (i16 255)
50+
; CHECK-NEXT: [[TMP22:%.*]] = trunc nuw <4 x i16> [[TMP21]] to <4 x i8>
51+
; CHECK-NEXT: [[TMP23:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP22]]
52+
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i8> [[TMP23]], i32 0
53+
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i8> [[TMP23]], i32 1
54+
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i8> [[TMP23]], i32 2
55+
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i8> [[TMP23]], i32 3
56+
; CHECK-NEXT: store i8 [[TMP24]], ptr [[NEXT_GEP]], align 1
57+
; CHECK-NEXT: store i8 [[TMP25]], ptr [[NEXT_GEP3]], align 1
58+
; CHECK-NEXT: store i8 [[TMP26]], ptr [[NEXT_GEP4]], align 1
59+
; CHECK-NEXT: store i8 [[TMP27]], ptr [[NEXT_GEP5]], align 1
60+
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 1
61+
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 1
62+
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 1
63+
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 1
64+
; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP28]], align 1
65+
; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP29]], align 1
66+
; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP30]], align 1
67+
; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP31]], align 1
68+
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i8> poison, i8 [[TMP32]], i32 0
69+
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i8> [[TMP36]], i8 [[TMP33]], i32 1
70+
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i8> [[TMP37]], i8 [[TMP34]], i32 2
71+
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i8> [[TMP38]], i8 [[TMP35]], i32 3
72+
; CHECK-NEXT: [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i16>
73+
; CHECK-NEXT: [[TMP41:%.*]] = mul nuw <4 x i16> [[TMP40]], [[BROADCAST_SPLAT]]
74+
; CHECK-NEXT: [[TMP42:%.*]] = udiv <4 x i16> [[TMP41]], splat (i16 255)
75+
; CHECK-NEXT: [[TMP43:%.*]] = trunc nuw <4 x i16> [[TMP42]] to <4 x i8>
76+
; CHECK-NEXT: [[TMP44:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP43]]
77+
; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i8> [[TMP44]], i32 0
78+
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x i8> [[TMP44]], i32 1
79+
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i8> [[TMP44]], i32 2
80+
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <4 x i8> [[TMP44]], i32 3
81+
; CHECK-NEXT: store i8 [[TMP45]], ptr [[TMP28]], align 1
82+
; CHECK-NEXT: store i8 [[TMP46]], ptr [[TMP29]], align 1
83+
; CHECK-NEXT: store i8 [[TMP47]], ptr [[TMP30]], align 1
84+
; CHECK-NEXT: store i8 [[TMP48]], ptr [[TMP31]], align 1
85+
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 2
86+
; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 2
87+
; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 2
88+
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 2
89+
; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP49]], align 1
90+
; CHECK-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP50]], align 1
91+
; CHECK-NEXT: [[TMP55:%.*]] = load i8, ptr [[TMP51]], align 1
92+
; CHECK-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP52]], align 1
93+
; CHECK-NEXT: [[TMP57:%.*]] = insertelement <4 x i8> poison, i8 [[TMP53]], i32 0
94+
; CHECK-NEXT: [[TMP58:%.*]] = insertelement <4 x i8> [[TMP57]], i8 [[TMP54]], i32 1
95+
; CHECK-NEXT: [[TMP59:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP55]], i32 2
96+
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i8> [[TMP59]], i8 [[TMP56]], i32 3
97+
; CHECK-NEXT: [[TMP61:%.*]] = zext <4 x i8> [[TMP60]] to <4 x i16>
98+
; CHECK-NEXT: [[TMP62:%.*]] = mul nuw <4 x i16> [[TMP61]], [[BROADCAST_SPLAT]]
99+
; CHECK-NEXT: [[TMP63:%.*]] = udiv <4 x i16> [[TMP62]], splat (i16 255)
100+
; CHECK-NEXT: [[TMP64:%.*]] = trunc nuw <4 x i16> [[TMP63]] to <4 x i8>
101+
; CHECK-NEXT: [[TMP65:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP64]]
102+
; CHECK-NEXT: [[TMP66:%.*]] = extractelement <4 x i8> [[TMP65]], i32 0
103+
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <4 x i8> [[TMP65]], i32 1
104+
; CHECK-NEXT: [[TMP68:%.*]] = extractelement <4 x i8> [[TMP65]], i32 2
105+
; CHECK-NEXT: [[TMP69:%.*]] = extractelement <4 x i8> [[TMP65]], i32 3
106+
; CHECK-NEXT: store i8 [[TMP66]], ptr [[TMP49]], align 1
107+
; CHECK-NEXT: store i8 [[TMP67]], ptr [[TMP50]], align 1
108+
; CHECK-NEXT: store i8 [[TMP68]], ptr [[TMP51]], align 1
109+
; CHECK-NEXT: store i8 [[TMP69]], ptr [[TMP52]], align 1
110+
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 3
111+
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 3
112+
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 3
113+
; CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 3
114+
; CHECK-NEXT: [[TMP74:%.*]] = load i8, ptr [[TMP70]], align 1
115+
; CHECK-NEXT: [[TMP75:%.*]] = load i8, ptr [[TMP71]], align 1
116+
; CHECK-NEXT: [[TMP76:%.*]] = load i8, ptr [[TMP72]], align 1
117+
; CHECK-NEXT: [[TMP77:%.*]] = load i8, ptr [[TMP73]], align 1
118+
; CHECK-NEXT: [[TMP78:%.*]] = insertelement <4 x i8> poison, i8 [[TMP74]], i32 0
119+
; CHECK-NEXT: [[TMP79:%.*]] = insertelement <4 x i8> [[TMP78]], i8 [[TMP75]], i32 1
120+
; CHECK-NEXT: [[TMP80:%.*]] = insertelement <4 x i8> [[TMP79]], i8 [[TMP76]], i32 2
121+
; CHECK-NEXT: [[TMP81:%.*]] = insertelement <4 x i8> [[TMP80]], i8 [[TMP77]], i32 3
122+
; CHECK-NEXT: [[TMP82:%.*]] = zext <4 x i8> [[TMP81]] to <4 x i16>
123+
; CHECK-NEXT: [[TMP83:%.*]] = mul nuw <4 x i16> [[TMP82]], [[BROADCAST_SPLAT]]
124+
; CHECK-NEXT: [[TMP84:%.*]] = udiv <4 x i16> [[TMP83]], splat (i16 255)
125+
; CHECK-NEXT: [[TMP85:%.*]] = trunc nuw <4 x i16> [[TMP84]] to <4 x i8>
126+
; CHECK-NEXT: [[TMP86:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP85]]
127+
; CHECK-NEXT: [[TMP87:%.*]] = extractelement <4 x i8> [[TMP86]], i32 0
128+
; CHECK-NEXT: [[TMP88:%.*]] = extractelement <4 x i8> [[TMP86]], i32 1
129+
; CHECK-NEXT: [[TMP89:%.*]] = extractelement <4 x i8> [[TMP86]], i32 2
130+
; CHECK-NEXT: [[TMP90:%.*]] = extractelement <4 x i8> [[TMP86]], i32 3
131+
; CHECK-NEXT: store i8 [[TMP87]], ptr [[TMP70]], align 1
132+
; CHECK-NEXT: store i8 [[TMP88]], ptr [[TMP71]], align 1
133+
; CHECK-NEXT: store i8 [[TMP89]], ptr [[TMP72]], align 1
134+
; CHECK-NEXT: store i8 [[TMP90]], ptr [[TMP73]], align 1
135+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
136+
; CHECK-NEXT: [[TMP91:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
137+
; CHECK-NEXT: br i1 [[TMP91]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
138+
; CHECK: [[MIDDLE_BLOCK]]:
139+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
140+
; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
141+
; CHECK: [[SCALAR_PH]]:
142+
;
143+
; INTERLEAVED-LABEL: define void @pr128062(
144+
; INTERLEAVED-SAME: ptr [[DST_START:%.*]], i64 [[N:%.*]], i8 [[A:%.*]]) {
145+
; INTERLEAVED-NEXT: [[ENTRY:.*:]]
146+
; INTERLEAVED-NEXT: [[IV_START:%.*]] = and i64 [[N]], -4
147+
; INTERLEAVED-NEXT: [[A_EXT:%.*]] = zext i8 [[A]] to i16
148+
; INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[IV_START]], -4
149+
; INTERLEAVED-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
150+
; INTERLEAVED-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
151+
; INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
152+
; INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
153+
; INTERLEAVED: [[VECTOR_PH]]:
154+
; INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
155+
; INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
156+
; INTERLEAVED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -4
157+
; INTERLEAVED-NEXT: [[TMP4:%.*]] = add i64 [[IV_START]], [[TMP3]]
158+
; INTERLEAVED-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 4
159+
; INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP5]]
160+
; INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A_EXT]], i64 0
161+
; INTERLEAVED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
162+
; INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
163+
; INTERLEAVED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
164+
; INTERLEAVED-NEXT: br label %[[VECTOR_BODY:.*]]
165+
; INTERLEAVED: [[VECTOR_BODY]]:
166+
; INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
167+
; INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
168+
; INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[OFFSET_IDX]]
169+
; INTERLEAVED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
170+
; INTERLEAVED-NEXT: [[TMP18:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
171+
; INTERLEAVED-NEXT: [[TMP39:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
172+
; INTERLEAVED-NEXT: [[TMP60:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
173+
; INTERLEAVED-NEXT: [[TMP81:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
174+
; INTERLEAVED-NEXT: [[TMP19:%.*]] = zext <4 x i8> [[TMP18]] to <4 x i16>
175+
; INTERLEAVED-NEXT: [[TMP20:%.*]] = mul nuw <4 x i16> [[TMP19]], [[BROADCAST_SPLAT]]
176+
; INTERLEAVED-NEXT: [[TMP21:%.*]] = udiv <4 x i16> [[TMP20]], splat (i16 255)
177+
; INTERLEAVED-NEXT: [[TMP22:%.*]] = trunc nuw <4 x i16> [[TMP21]] to <4 x i8>
178+
; INTERLEAVED-NEXT: [[TMP23:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP22]]
179+
; INTERLEAVED-NEXT: [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i16>
180+
; INTERLEAVED-NEXT: [[TMP41:%.*]] = mul nuw <4 x i16> [[TMP40]], [[BROADCAST_SPLAT]]
181+
; INTERLEAVED-NEXT: [[TMP42:%.*]] = udiv <4 x i16> [[TMP41]], splat (i16 255)
182+
; INTERLEAVED-NEXT: [[TMP43:%.*]] = trunc nuw <4 x i16> [[TMP42]] to <4 x i8>
183+
; INTERLEAVED-NEXT: [[TMP44:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP43]]
184+
; INTERLEAVED-NEXT: [[TMP61:%.*]] = zext <4 x i8> [[TMP60]] to <4 x i16>
185+
; INTERLEAVED-NEXT: [[TMP62:%.*]] = mul nuw <4 x i16> [[TMP61]], [[BROADCAST_SPLAT]]
186+
; INTERLEAVED-NEXT: [[TMP63:%.*]] = udiv <4 x i16> [[TMP62]], splat (i16 255)
187+
; INTERLEAVED-NEXT: [[TMP64:%.*]] = trunc nuw <4 x i16> [[TMP63]] to <4 x i8>
188+
; INTERLEAVED-NEXT: [[TMP65:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP64]]
189+
; INTERLEAVED-NEXT: [[TMP82:%.*]] = zext <4 x i8> [[TMP81]] to <4 x i16>
190+
; INTERLEAVED-NEXT: [[TMP83:%.*]] = mul nuw <4 x i16> [[TMP82]], [[BROADCAST_SPLAT]]
191+
; INTERLEAVED-NEXT: [[TMP84:%.*]] = udiv <4 x i16> [[TMP83]], splat (i16 255)
192+
; INTERLEAVED-NEXT: [[TMP85:%.*]] = trunc nuw <4 x i16> [[TMP84]] to <4 x i8>
193+
; INTERLEAVED-NEXT: [[TMP86:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP85]]
194+
; INTERLEAVED-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP23]], <4 x i8> [[TMP44]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
195+
; INTERLEAVED-NEXT: [[TMP28:%.*]] = shufflevector <4 x i8> [[TMP65]], <4 x i8> [[TMP86]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
196+
; INTERLEAVED-NEXT: [[TMP29:%.*]] = shufflevector <8 x i8> [[TMP27]], <8 x i8> [[TMP28]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
197+
; INTERLEAVED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP29]], <16 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
198+
; INTERLEAVED-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1
199+
; INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
200+
; INTERLEAVED-NEXT: [[TMP91:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
201+
; INTERLEAVED-NEXT: br i1 [[TMP91]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
202+
; INTERLEAVED: [[MIDDLE_BLOCK]]:
203+
; INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
204+
; INTERLEAVED-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
205+
; INTERLEAVED: [[SCALAR_PH]]:
206+
;
207+
entry:
208+
%iv.start = and i64 %n, -4
209+
%a.ext = zext i8 %a to i16
210+
br label %loop
211+
212+
loop:
213+
%iv = phi i64 [ %iv.next, %loop ], [ %iv.start, %entry ]
214+
%dst = phi ptr [ %dst.next, %loop ], [ %dst.start, %entry ]
215+
%dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4
216+
%load.dst = load i8, ptr %dst, align 1
217+
%dst.ext = zext i8 %load.dst to i16
218+
%mul.dst.0 = mul nuw i16 %dst.ext, %a.ext
219+
%udiv.0 = udiv i16 %mul.dst.0, 255
220+
%trunc.0 = trunc nuw i16 %udiv.0 to i8
221+
%val.0 = add i8 %a, %trunc.0
222+
store i8 %val.0, ptr %dst, align 1
223+
%gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1
224+
%load.dst.1 = load i8, ptr %gep.dst.1, align 1
225+
%dst.1.ext = zext i8 %load.dst.1 to i16
226+
%mul.dst.1 = mul nuw i16 %dst.1.ext, %a.ext
227+
%udiv.1 = udiv i16 %mul.dst.1, 255
228+
%trunc.1 = trunc nuw i16 %udiv.1 to i8
229+
%val.1 = add i8 %a, %trunc.1
230+
store i8 %val.1, ptr %gep.dst.1, align 1
231+
%gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2
232+
%load.dst.2 = load i8, ptr %gep.dst.2, align 1
233+
%dst.2.ext = zext i8 %load.dst.2 to i16
234+
%mul.dst.2 = mul nuw i16 %dst.2.ext, %a.ext
235+
%udiv.2 = udiv i16 %mul.dst.2, 255
236+
%trunc.2 = trunc nuw i16 %udiv.2 to i8
237+
%val.2 = add i8 %a, %trunc.2
238+
store i8 %val.2, ptr %gep.dst.2, align 1
239+
%gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3
240+
%load.dst.3 = load i8, ptr %gep.dst.3, align 1
241+
%dst.3.ext = zext i8 %load.dst.3 to i16
242+
%mul.dst.3 = mul nuw i16 %dst.3.ext, %a.ext
243+
%udiv.3 = udiv i16 %mul.dst.3, 255
244+
%trunc.3 = trunc nuw i16 %udiv.3 to i8
245+
%val.3 = add i8 %a, %trunc.3
246+
store i8 %val.3, ptr %gep.dst.3, align 1
247+
%iv.next = add i64 %iv, -4
248+
%exit.cond = icmp eq i64 %iv.next, 0
249+
br i1 %exit.cond, label %exit, label %loop
250+
251+
exit:
252+
ret void
253+
}

0 commit comments

Comments
 (0)