@@ -24,20 +24,21 @@ target triple = "aarch64-linux-gnu"
24
24
; DEBUG-EPILOG-PREFER-SCALABLE: Create Skeleton for epilogue vectorized loop (first pass)
25
25
; DEBUG-EPILOG-PREFER-SCALABLE: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:vscale x 8, Epilogue Loop UF:1
26
26
27
- define void @main_vf_vscale_x_16 (ptr %A ) #0 {
27
+ define void @main_vf_vscale_x_16 (ptr %A , i64 %n ) #0 {
28
28
; CHECK-LABEL: @main_vf_vscale_x_16(
29
29
; CHECK-NEXT: iter.check:
30
- ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
30
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N:%.*]], 8
31
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
31
32
; CHECK: vector.main.loop.iter.check:
32
33
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
33
34
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
34
- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024 , [[TMP1]]
35
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]] , [[TMP1]]
35
36
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
36
37
; CHECK: vector.ph:
37
38
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
38
39
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
39
- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024 , [[TMP3]]
40
- ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024 , [[N_MOD_VF]]
40
+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]] , [[TMP3]]
41
+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]] , [[N_MOD_VF]]
41
42
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
42
43
; CHECK: vector.body:
43
44
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -51,45 +52,48 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
51
52
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
52
53
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
53
54
; CHECK: middle.block:
54
- ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024 , [[N_VEC]]
55
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]] , [[N_VEC]]
55
56
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
56
57
; CHECK: vec.epilog.iter.check:
57
- ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024 , [[N_VEC]]
58
+ ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]] , [[N_VEC]]
58
59
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
59
- ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
60
+ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
60
61
; CHECK: vec.epilog.ph:
61
62
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
63
+ ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8
64
+ ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]]
62
65
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
63
66
; CHECK: vec.epilog.vector.body:
64
67
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
65
68
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX1]]
66
69
; CHECK-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1
67
70
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
68
- ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
69
- ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3 :![0-9]+]]
71
+ ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC3]]
72
+ ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4 :![0-9]+]]
70
73
; CHECK: vec.epilog.middle.block:
71
- ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
74
+ ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
75
+ ; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
72
76
; CHECK: vec.epilog.scalar.ph:
73
- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024 , [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
77
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]] , [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
74
78
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
75
79
; CHECK: for.body:
76
80
;
77
81
; CHECK-EPILOG-PREFER-SCALABLE-LABEL: @main_vf_vscale_x_16(
78
82
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: iter.check:
79
83
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
80
84
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
81
- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024 , [[TMP1]]
85
+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]] , [[TMP1]]
82
86
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
83
87
; CHECK-EPILOG-PREFER-SCALABLE: vector.main.loop.iter.check:
84
88
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
85
89
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5
86
- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 1024 , [[TMP3]]
90
+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]] , [[TMP3]]
87
91
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
88
92
; CHECK-EPILOG-PREFER-SCALABLE: vector.ph:
89
93
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
90
94
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32
91
- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024 , [[TMP5]]
92
- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024 , [[N_MOD_VF]]
95
+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]] , [[TMP5]]
96
+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]] , [[N_MOD_VF]]
93
97
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
94
98
; CHECK-EPILOG-PREFER-SCALABLE: vector.body:
95
99
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -103,30 +107,30 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
103
107
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
104
108
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
105
109
; CHECK-EPILOG-PREFER-SCALABLE: middle.block:
106
- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024 , [[N_VEC]]
110
+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]] , [[N_VEC]]
107
111
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
108
112
; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.iter.check:
109
- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024 , [[N_VEC]]
113
+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]] , [[N_VEC]]
110
114
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
111
115
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 3
112
116
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP12]]
113
- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
117
+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
114
118
; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.ph:
115
119
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
116
120
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
117
121
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8
118
- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF2:%.*]] = urem i64 1024 , [[TMP14]]
119
- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC3:%.*]] = sub i64 1024 , [[N_MOD_VF2]]
122
+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]] , [[TMP14]]
123
+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]] , [[N_MOD_VF2]]
120
124
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
121
125
; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.vector.body:
122
126
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
123
127
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX4]]
124
128
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: store <vscale x 8 x i8> splat (i8 1), ptr [[TMP15]], align 1
125
129
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], [[TMP14]]
126
130
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]]
127
- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3 :![0-9]+]]
131
+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4 :![0-9]+]]
128
132
; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.middle.block:
129
- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N6:%.*]] = icmp eq i64 1024 , [[N_VEC3]]
133
+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]] , [[N_VEC3]]
130
134
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
131
135
; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.scalar.ph:
132
136
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
@@ -141,7 +145,7 @@ for.body:
141
145
%arrayidx = getelementptr inbounds i8 , ptr %A , i64 %iv
142
146
store i8 1 , ptr %arrayidx , align 1
143
147
%iv.next = add nuw nsw i64 %iv , 1
144
- %exitcond = icmp ne i64 %iv.next , 1024
148
+ %exitcond = icmp ne i64 %iv.next , %n
145
149
br i1 %exitcond , label %for.body , label %exit
146
150
147
151
exit:
0 commit comments