Skip to content

Commit 68574da

Browse files
committed
Fixups
1 parent b2aed52 commit 68574da

File tree

2 files changed

+74
-18
lines changed

2 files changed

+74
-18
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10349,10 +10349,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1034910349
}
1035010350

1035110351
if (LVL.hasStructVectorCall()) {
10352-
constexpr StringLiteral FailureMessage(
10353-
"Auto-vectorization of calls that return struct types is not yet "
10354-
"supported");
10355-
reportVectorizationFailure(FailureMessage, FailureMessage,
10352+
reportVectorizationFailure("Auto-vectorization of calls that return struct "
10353+
"types is not yet supported",
1035610354
"StructCallVectorizationUnsupported", ORE, L);
1035710355
return false;
1035810356
}

llvm/test/Transforms/LoopVectorize/struct-return.ll

Lines changed: 72 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,35 @@ exit:
114114
ret void
115115
}
116116

117+
; TODO: Allow mixed-struct type vectorization and mark overflow intrinsics as trivially vectorizable.
118+
; CHECK-REMARKS: remark: {{.*}} loop not vectorized: call instruction cannot be vectorized
119+
define void @test_overflow_intrinsic(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
120+
; CHECK-LABEL: define void @test_overflow_intrinsic
121+
; CHECK-NOT: vector.body:
122+
; CHECK-NOT: @llvm.sadd.with.overflow.v{{.+}}i32
123+
entry:
124+
br label %for.body
125+
126+
for.body:
127+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
128+
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
129+
%in_val = load i32, ptr %arrayidx, align 4
130+
%call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %in_val, i32 %in_val)
131+
%extract_ret = extractvalue { i32, i1 } %call, 0
132+
%extract_overflow = extractvalue { i32, i1 } %call, 1
133+
%zext_overflow = zext i1 %extract_overflow to i8
134+
%arrayidx2 = getelementptr inbounds i32, ptr %out_a, i64 %iv
135+
store i32 %extract_ret, ptr %arrayidx2, align 4
136+
%arrayidx4 = getelementptr inbounds i8, ptr %out_b, i64 %iv
137+
store i8 %zext_overflow, ptr %arrayidx4, align 4
138+
%iv.next = add nuw nsw i64 %iv, 1
139+
%exitcond.not = icmp eq i64 %iv.next, 1024
140+
br i1 %exitcond.not, label %exit, label %for.body
141+
142+
exit:
143+
ret void
144+
}
145+
117146
; Negative test. Widening structs with mixed element types is not supported.
118147
; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
119148
define void @negative_mixed_element_type_struct_return(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
@@ -172,27 +201,54 @@ exit:
172201
ret void
173202
}
174203

175-
; TODO: Allow mixed-struct type vectorization and mark overflow intrinsics as trivially vectorizable.
176-
; CHECK-REMARKS: remark: {{.*}} loop not vectorized: call instruction cannot be vectorized
177-
define void @test_overflow_intrinsic(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
178-
; CHECK-LABEL: define void @test_overflow_intrinsic
204+
; Negative test. Nested homogeneous structs are not supported.
205+
; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
206+
define void @negative_nested_struct(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
207+
; CHECK-LABEL: define void @negative_nested_struct
179208
; CHECK-NOT: vector.body:
180-
; CHECK-NOT: @llvm.sadd.with.overflow.v{{.+}}i32
181209
entry:
182210
br label %for.body
183211

184212
for.body:
185213
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
186214
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
187-
%in_val = load i32, ptr %arrayidx, align 4
188-
%call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %in_val, i32 %in_val)
189-
%extract_ret = extractvalue { i32, i1 } %call, 0
190-
%extract_overflow = extractvalue { i32, i1 } %call, 1
191-
%zext_overflow = zext i1 %extract_overflow to i8
192-
%arrayidx2 = getelementptr inbounds i32, ptr %out_a, i64 %iv
193-
store i32 %extract_ret, ptr %arrayidx2, align 4
194-
%arrayidx4 = getelementptr inbounds i8, ptr %out_b, i64 %iv
195-
store i8 %zext_overflow, ptr %arrayidx4, align 4
215+
%in_val = load float, ptr %arrayidx, align 4
216+
%call = tail call { { float, float } } @foo_nested_struct(float %in_val) #0
217+
%extract_inner = extractvalue { { float, float } } %call, 0
218+
%extract_a = extractvalue { float, float } %extract_inner, 0
219+
%extract_b = extractvalue { float, float } %extract_inner, 1
220+
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
221+
store float %extract_a, ptr %arrayidx2, align 4
222+
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
223+
store float %extract_b, ptr %arrayidx4, align 4
224+
%iv.next = add nuw nsw i64 %iv, 1
225+
%exitcond.not = icmp eq i64 %iv.next, 1024
226+
br i1 %exitcond.not, label %exit, label %for.body
227+
228+
exit:
229+
ret void
230+
}
231+
232+
; Negative test. Homogeneous structs of arrays are not supported.
233+
; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
234+
define void @negative_struct_array_elements(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
235+
; CHECK-LABEL: define void @negative_struct_array_elements
236+
; CHECK-NOT: vector.body:
237+
entry:
238+
br label %for.body
239+
240+
for.body:
241+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
242+
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
243+
%in_val = load float, ptr %arrayidx, align 4
244+
%call = tail call { [2 x float] } @foo_arrays(float %in_val) #0
245+
%extract_inner = extractvalue { [2 x float] } %call, 0
246+
%extract_a = extractvalue [2 x float] %extract_inner, 0
247+
%extract_b = extractvalue [2 x float] %extract_inner, 1
248+
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
249+
store float %extract_a, ptr %arrayidx2, align 4
250+
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
251+
store float %extract_b, ptr %arrayidx4, align 4
196252
%iv.next = add nuw nsw i64 %iv, 1
197253
%exitcond.not = icmp eq i64 %iv.next, 1024
198254
br i1 %exitcond.not, label %exit, label %for.body
@@ -254,6 +310,8 @@ declare { float, float } @foo(float)
254310
declare { double, double } @bar(double)
255311
declare { float, i32 } @baz(float)
256312
declare %named_struct @bar_named(double)
313+
declare { { float, float } } @foo_nested_struct(float)
314+
declare { [2 x float] } @foo_arrays(float)
257315

258316
declare { <2 x float>, <2 x float> } @fixed_vec_foo(<2 x float>)
259317
declare { <2 x double>, <2 x double> } @fixed_vec_bar(<2 x double>)

0 commit comments

Comments
 (0)