Skip to content

Commit d0d5ef8

Browse files
authored
[LV] Add support for linear arguments for vector function variants (#73941)
If we have vectorized variants of a function which take linear parameters, we should be able to vectorize assuming the strides match.
1 parent 11a7e57 commit d0d5ef8

File tree

2 files changed

+53
-15
lines changed

2 files changed

+53
-15
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6888,6 +6888,30 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
68886888
ParamsOk = false;
68896889
break;
68906890
}
6891+
case VFParamKind::OMP_Linear: {
6892+
Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
6893+
// Find the stride for the scalar parameter in this loop and see if
6894+
// it matches the stride for the variant.
6895+
// TODO: do we need to figure out the cost of an extract to get the
6896+
// first lane? Or do we hope that it will be folded away?
6897+
ScalarEvolution *SE = PSE.getSE();
6898+
const auto *SAR =
6899+
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(ScalarParam));
6900+
6901+
if (!SAR || SAR->getLoop() != TheLoop) {
6902+
ParamsOk = false;
6903+
break;
6904+
}
6905+
6906+
const SCEVConstant *Step =
6907+
dyn_cast<SCEVConstant>(SAR->getStepRecurrence(*SE));
6908+
6909+
if (!Step ||
6910+
Step->getAPInt().getSExtValue() != Param.LinearStepOrPos)
6911+
ParamsOk = false;
6912+
6913+
break;
6914+
}
68916915
case VFParamKind::GlobalPredicate:
68926916
UsesMask = true;
68936917
break;

llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call" --version 2
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(foo|bar|baz|quux)" --version 2
22
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON
33
; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON
44
; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF
@@ -10,15 +10,18 @@ target triple = "aarch64-unknown-linux-gnu"
1010
define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) {
1111
; NEON-LABEL: define void @test_linear8
1212
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
13+
; NEON: [[TMP3:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP2:%.*]])
1314
; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]]
1415
;
1516
; SVE_OR_NEON-LABEL: define void @test_linear8
1617
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
17-
; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
18+
; SVE_OR_NEON: [[TMP13:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_nomask_sve(ptr [[TMP12:%.*]])
19+
; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
1820
;
1921
; SVE_TF-LABEL: define void @test_linear8
2022
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
21-
; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
23+
; SVE_TF: [[TMP19:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr [[TMP18:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
24+
; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
2225
;
2326
entry:
2427
br label %for.body
@@ -40,15 +43,17 @@ for.cond.cleanup:
4043
define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) {
4144
; NEON-LABEL: define void @test_vector_linear4
4245
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
46+
; NEON: [[TMP5:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
4347
; NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
4448
;
4549
; SVE_OR_NEON-LABEL: define void @test_vector_linear4
4650
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
47-
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
51+
; SVE_OR_NEON: [[TMP15:%.*]] = call <vscale x 4 x i32> @vec_baz_vector_linear4_nomask_sve(<vscale x 4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP14:%.*]])
52+
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
4853
;
4954
; SVE_TF-LABEL: define void @test_vector_linear4
5055
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
51-
; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
56+
; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
5257
;
5358
entry:
5459
br label %for.body
@@ -76,11 +81,11 @@ define void @test_linear8_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
7681
;
7782
; SVE_OR_NEON-LABEL: define void @test_linear8_bad_stride
7883
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
79-
; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
84+
; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
8085
;
8186
; SVE_TF-LABEL: define void @test_linear8_bad_stride
8287
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
83-
; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
88+
; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]]
8489
;
8590
entry:
8691
br label %for.body
@@ -102,15 +107,17 @@ for.cond.cleanup:
102107
define void @test_linear16_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
103108
; NEON-LABEL: define void @test_linear16_wide_stride
104109
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
110+
; NEON: [[TMP4:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP3:%.*]])
105111
; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]]
106112
;
107113
; SVE_OR_NEON-LABEL: define void @test_linear16_wide_stride
108114
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
109-
; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]]
115+
; SVE_OR_NEON: [[TMP14:%.*]] = call <vscale x 2 x i64> @vec_foo_linear16_nomask_sve(ptr [[TMP13:%.*]])
116+
; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4]]
110117
;
111118
; SVE_TF-LABEL: define void @test_linear16_wide_stride
112119
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
113-
; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]]
120+
; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR5]]
114121
;
115122
entry:
116123
br label %for.body
@@ -133,15 +140,18 @@ for.cond.cleanup:
133140
define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) {
134141
; NEON-LABEL: define void @test_linear4_linear8
135142
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
143+
; NEON: [[TMP5:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP3:%.*]], ptr [[TMP4:%.*]])
136144
; NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
137145
;
138146
; SVE_OR_NEON-LABEL: define void @test_linear4_linear8
139147
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
140-
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
148+
; SVE_OR_NEON: [[TMP15:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP13:%.*]], ptr [[TMP14:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
149+
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]]
141150
;
142151
; SVE_TF-LABEL: define void @test_linear4_linear8
143152
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
144-
; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
153+
; SVE_TF: [[TMP21:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP19:%.*]], ptr [[TMP20:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
154+
; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]]
145155
;
146156
entry:
147157
br label %for.body
@@ -164,15 +174,17 @@ for.cond.cleanup:
164174
define void @test_linear3_non_ptr(ptr noalias %a, i64 %n) {
165175
; NEON-LABEL: define void @test_linear3_non_ptr
166176
; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) {
177+
; NEON: [[TMP3:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP2:%.*]])
167178
; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]]
168179
;
169180
; SVE_OR_NEON-LABEL: define void @test_linear3_non_ptr
170181
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
171-
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]]
182+
; SVE_OR_NEON: [[TMP13:%.*]] = call <vscale x 4 x i32> @vec_bar_linear3_nomask_sve(i32 [[TMP12:%.*]])
183+
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR6:[0-9]+]]
172184
;
173185
; SVE_TF-LABEL: define void @test_linear3_non_ptr
174186
; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
175-
; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]]
187+
; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR7:[0-9]+]]
176188
;
177189
entry:
178190
br label %for.body
@@ -195,15 +207,17 @@ for.cond.cleanup:
195207
define void @test_linearn5_non_ptr_neg_stride(ptr noalias %a, i64 %n) {
196208
; NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride
197209
; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) {
210+
; NEON: [[TMP3:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP2:%.*]])
198211
; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]]
199212
;
200213
; SVE_OR_NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride
201214
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
202-
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]]
215+
; SVE_OR_NEON: [[TMP13:%.*]] = call <vscale x 4 x i32> @vec_bar_linearn5_nomask_sve(i32 [[TMP12:%.*]])
216+
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR7:[0-9]+]]
203217
;
204218
; SVE_TF-LABEL: define void @test_linearn5_non_ptr_neg_stride
205219
; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
206-
; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]]
220+
; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR8:[0-9]+]]
207221
;
208222
entry:
209223
br label %for.body

0 commit comments

Comments
 (0)