55target triple = "riscv64-none-unknown-elf"
66
77define i32 @vqdot (ptr %a , ptr %b ) #0 {
8- ; CHECK-LABEL: define i32 @vqdot(
9- ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
10- ; CHECK-NEXT: entry:
11- ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
12- ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
13- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
14- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
15- ; CHECK: vector.ph:
16- ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
17- ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
18- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
19- ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
20- ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
21- ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
22- ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
23- ; CHECK: vector.body:
24- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
25- ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
26- ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
27- ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
28- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP7]], align 1
29- ; CHECK-NEXT: [[TMP8:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
30- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
31- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 0
32- ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP10]], align 1
33- ; CHECK-NEXT: [[TMP11:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32>
34- ; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]]
35- ; CHECK-NEXT: [[TMP13]] = add <vscale x 4 x i32> [[TMP12]], [[VEC_PHI]]
36- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
37- ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
38- ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
39- ; CHECK: middle.block:
40- ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP13]])
41- ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
42- ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
43- ; CHECK: scalar.ph:
8+ ; V-LABEL: define i32 @vqdot(
9+ ; V-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
10+ ; V-NEXT: entry:
11+ ; V-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
12+ ; V-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
13+ ; V-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
14+ ; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
15+ ; V: vector.ph:
16+ ; V-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
17+ ; V-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
18+ ; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
19+ ; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
20+ ; V-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
21+ ; V-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
22+ ; V-NEXT: br label [[VECTOR_BODY:%.*]]
23+ ; V: vector.body:
24+ ; V-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
25+ ; V-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
26+ ; V-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
27+ ; V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
28+ ; V-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP7]], align 1
29+ ; V-NEXT: [[TMP8:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
30+ ; V-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
31+ ; V-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 0
32+ ; V-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP10]], align 1
33+ ; V-NEXT: [[TMP11:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32>
34+ ; V-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]]
35+ ; V-NEXT: [[TMP13]] = add <vscale x 4 x i32> [[TMP12]], [[VEC_PHI]]
36+ ; V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
37+ ; V-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
38+ ; V-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
39+ ; V: middle.block:
40+ ; V-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP13]])
41+ ; V-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
42+ ; V-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
43+ ; V: scalar.ph:
44+ ;
45+ ; ZVQDOTQ-LABEL: define i32 @vqdot(
46+ ; ZVQDOTQ-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
47+ ; ZVQDOTQ-NEXT: entry:
48+ ; ZVQDOTQ-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
49+ ; ZVQDOTQ-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
50+ ; ZVQDOTQ-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
51+ ; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
52+ ; ZVQDOTQ: vector.ph:
53+ ; ZVQDOTQ-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
54+ ; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
55+ ; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
56+ ; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
57+ ; ZVQDOTQ-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
58+ ; ZVQDOTQ-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
59+ ; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]]
60+ ; ZVQDOTQ: vector.body:
61+ ; ZVQDOTQ-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
62+ ; ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 1 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
63+ ; ZVQDOTQ-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
64+ ; ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
65+ ; ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP7]], align 1
66+ ; ZVQDOTQ-NEXT: [[TMP8:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
67+ ; ZVQDOTQ-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
68+ ; ZVQDOTQ-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 0
69+ ; ZVQDOTQ-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP10]], align 1
70+ ; ZVQDOTQ-NEXT: [[TMP11:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32>
71+ ; ZVQDOTQ-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]]
72+ ; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 1 x i32> @llvm.experimental.vector.partial.reduce.add.nxv1i32.nxv4i32(<vscale x 1 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP12]])
73+ ; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
74+ ; ZVQDOTQ-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
75+ ; ZVQDOTQ-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
76+ ; ZVQDOTQ: middle.block:
77+ ; ZVQDOTQ-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> [[PARTIAL_REDUCE]])
78+ ; ZVQDOTQ-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
79+ ; ZVQDOTQ-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
80+ ; ZVQDOTQ: scalar.ph:
4481;
4582entry:
4683 br label %for.body
@@ -66,42 +103,79 @@ for.exit: ; preds = %for.body
66103
67104
68105define i32 @vqdotu (ptr %a , ptr %b ) #0 {
69- ; CHECK-LABEL: define i32 @vqdotu(
70- ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
71- ; CHECK-NEXT: entry:
72- ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
73- ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
74- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
75- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
76- ; CHECK: vector.ph:
77- ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
78- ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
79- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
80- ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
81- ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
82- ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
83- ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
84- ; CHECK: vector.body:
85- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
86- ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
87- ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
88- ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
89- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP7]], align 1
90- ; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
91- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
92- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 0
93- ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP10]], align 1
94- ; CHECK-NEXT: [[TMP11:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32>
95- ; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]]
96- ; CHECK-NEXT: [[TMP13]] = add <vscale x 4 x i32> [[TMP12]], [[VEC_PHI]]
97- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
98- ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
99- ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
100- ; CHECK: middle.block:
101- ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP13]])
102- ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
103- ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
104- ; CHECK: scalar.ph:
106+ ; V-LABEL: define i32 @vqdotu(
107+ ; V-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
108+ ; V-NEXT: entry:
109+ ; V-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
110+ ; V-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
111+ ; V-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
112+ ; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
113+ ; V: vector.ph:
114+ ; V-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
115+ ; V-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
116+ ; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
117+ ; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
118+ ; V-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
119+ ; V-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
120+ ; V-NEXT: br label [[VECTOR_BODY:%.*]]
121+ ; V: vector.body:
122+ ; V-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
123+ ; V-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
124+ ; V-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
125+ ; V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
126+ ; V-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP7]], align 1
127+ ; V-NEXT: [[TMP8:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
128+ ; V-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
129+ ; V-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 0
130+ ; V-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP10]], align 1
131+ ; V-NEXT: [[TMP11:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32>
132+ ; V-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]]
133+ ; V-NEXT: [[TMP13]] = add <vscale x 4 x i32> [[TMP12]], [[VEC_PHI]]
134+ ; V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
135+ ; V-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
136+ ; V-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
137+ ; V: middle.block:
138+ ; V-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP13]])
139+ ; V-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
140+ ; V-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
141+ ; V: scalar.ph:
142+ ;
143+ ; ZVQDOTQ-LABEL: define i32 @vqdotu(
144+ ; ZVQDOTQ-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
145+ ; ZVQDOTQ-NEXT: entry:
146+ ; ZVQDOTQ-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
147+ ; ZVQDOTQ-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
148+ ; ZVQDOTQ-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
149+ ; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
150+ ; ZVQDOTQ: vector.ph:
151+ ; ZVQDOTQ-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
152+ ; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
153+ ; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
154+ ; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
155+ ; ZVQDOTQ-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
156+ ; ZVQDOTQ-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
157+ ; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]]
158+ ; ZVQDOTQ: vector.body:
159+ ; ZVQDOTQ-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
160+ ; ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 1 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
161+ ; ZVQDOTQ-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
162+ ; ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
163+ ; ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP7]], align 1
164+ ; ZVQDOTQ-NEXT: [[TMP8:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
165+ ; ZVQDOTQ-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
166+ ; ZVQDOTQ-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 0
167+ ; ZVQDOTQ-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP10]], align 1
168+ ; ZVQDOTQ-NEXT: [[TMP11:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32>
169+ ; ZVQDOTQ-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]]
170+ ; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 1 x i32> @llvm.experimental.vector.partial.reduce.add.nxv1i32.nxv4i32(<vscale x 1 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP12]])
171+ ; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
172+ ; ZVQDOTQ-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
173+ ; ZVQDOTQ-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
174+ ; ZVQDOTQ: middle.block:
175+ ; ZVQDOTQ-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> [[PARTIAL_REDUCE]])
176+ ; ZVQDOTQ-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
177+ ; ZVQDOTQ-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
178+ ; ZVQDOTQ: scalar.ph:
105179;
106180entry:
107181 br label %for.body
@@ -128,7 +202,7 @@ for.exit: ; preds = %for.body
128202
129203define i32 @vqdotsu (ptr %a , ptr %b ) #0 {
130204; CHECK-LABEL: define i32 @vqdotsu(
131- ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
205+ ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+ ]] {
132206; CHECK-NEXT: entry:
133207; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
134208; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
@@ -245,6 +319,3 @@ for.body: ; preds = %for.body, %entry
245319for.exit: ; preds = %for.body
246320 ret i32 %add
247321}
248- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
249- ; V: {{.*}}
250- ; ZVQDOTQ: {{.*}}
0 commit comments