@@ -1143,6 +1143,242 @@ exit:
11431143 ret i16 %for.1
11441144}
11451145
1146+ define i64 @print_extended_reduction (ptr nocapture readonly %x , ptr nocapture readonly %y , i32 %n ) {
1147+ ; CHECK-LABEL: 'print_extended_reduction'
1148+ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
1149+ ; CHECK-NEXT: Live-in vp<%0> = VF * UF
1150+ ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
1151+ ; CHECK-NEXT: Live-in ir<%n> = original trip-count
1152+ ; CHECK-EMPTY:
1153+ ; CHECK-NEXT: vector.ph:
1154+ ; CHECK-NEXT: Successor(s): vector loop
1155+ ; CHECK-EMPTY:
1156+ ; CHECK-NEXT: <x1> vector loop: {
1157+ ; CHECK-NEXT: vector.body:
1158+ ; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
1159+ ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%r.09> = phi ir<0>, ir<%add>
1160+ ; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
1161+ ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<%3>
1162+ ; CHECK-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
1163+ ; CHECK-NEXT: WIDEN ir<%load0> = load vp<%4>
1164+ ; CHECK-NEXT: EXTENDED-REDUCE ir<%add> = ir<%r.09> + reduce.add (ir<%load0> extended to i64)
1165+ ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0>
1166+ ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%1>
1167+ ; CHECK-NEXT: No successors
1168+ ; CHECK-NEXT: }
1169+ ; CHECK-NEXT: Successor(s): middle.block
1170+ ; CHECK-EMPTY:
1171+ ; CHECK-NEXT: middle.block:
1172+ ; CHECK-NEXT: EMIT vp<%6> = compute-reduction-result ir<%r.09>, ir<%add>
1173+ ; CHECK-NEXT: EMIT vp<%7> = extract-from-end vp<%6>, ir<1>
1174+ ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<%1>
1175+ ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
1176+ ; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
1177+ ; CHECK-EMPTY:
1178+ ; CHECK-NEXT: ir-bb<for.cond.cleanup.loopexit>:
1179+ ; CHECK-NEXT: IR %add.lcssa = phi i64 [ %add, %for.body ] (extra operand: vp<%7>)
1180+ ; CHECK-NEXT: No successors
1181+ ; CHECK-EMPTY:
1182+ ; CHECK-NEXT: scalar.ph:
1183+ ; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<%6>, ir<0>
1184+ ; CHECK-NEXT: Successor(s): ir-bb<for.body>
1185+ ; CHECK-EMPTY:
1186+ ; CHECK-NEXT: ir-bb<for.body>:
1187+ ; CHECK-NEXT: IR %i.010 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
1188+ ; CHECK-NEXT: IR %r.09 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<%bc.merge.rdx>)
1189+ ; CHECK-NEXT: IR %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.010
1190+ ; CHECK-NEXT: IR %load0 = load i32, ptr %arrayidx, align 4
1191+ ; CHECK-NEXT: IR %conv0 = zext i32 %load0 to i64
1192+ ; CHECK-NEXT: IR %add = add nsw i64 %r.09, %conv0
1193+ ; CHECK-NEXT: IR %inc = add nuw nsw i32 %i.010, 1
1194+ ; CHECK-NEXT: IR %exitcond = icmp eq i32 %inc, %n
1195+ ; CHECK-NEXT: No successors
1196+ ; CHECK-NEXT: }
1197+ ;
1198+ entry:
1199+ %cmp8 = icmp sgt i32 %n , 0
1200+ br i1 %cmp8 , label %for.body , label %for.cond.cleanup
1201+
1202+ for.body: ; preds = %entry, %for.body
1203+ %i.010 = phi i32 [ %inc , %for.body ], [ 0 , %entry ]
1204+ %r.09 = phi i64 [ %add , %for.body ], [ 0 , %entry ]
1205+ %arrayidx = getelementptr inbounds i32 , ptr %x , i32 %i.010
1206+ %load0 = load i32 , ptr %arrayidx , align 4
1207+ %conv0 = zext i32 %load0 to i64
1208+ %add = add nsw i64 %r.09 , %conv0
1209+ %inc = add nuw nsw i32 %i.010 , 1
1210+ %exitcond = icmp eq i32 %inc , %n
1211+ br i1 %exitcond , label %for.cond.cleanup , label %for.body
1212+
1213+ for.cond.cleanup: ; preds = %for.body, %entry
1214+ %r.0.lcssa = phi i64 [ 0 , %entry ], [ %add , %for.body ]
1215+ ret i64 %r.0.lcssa
1216+ }
1217+
1218+ define i64 @print_mulacc (ptr nocapture readonly %x , ptr nocapture readonly %y , i32 %n ) {
1219+ ; CHECK-LABEL: 'print_mulacc'
1220+ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
1221+ ; CHECK-NEXT: Live-in vp<%0> = VF * UF
1222+ ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
1223+ ; CHECK-NEXT: Live-in ir<%n> = original trip-count
1224+ ; CHECK-EMPTY:
1225+ ; CHECK-NEXT: vector.ph:
1226+ ; CHECK-NEXT: Successor(s): vector loop
1227+ ; CHECK-EMPTY:
1228+ ; CHECK-NEXT: <x1> vector loop: {
1229+ ; CHECK-NEXT: vector.body:
1230+ ; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
1231+ ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%r.09> = phi ir<0>, ir<%add>
1232+ ; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
1233+ ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<%3>
1234+ ; CHECK-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
1235+ ; CHECK-NEXT: WIDEN ir<%load0> = load vp<%4>
1236+ ; CHECK-NEXT: CLONE ir<%arrayidx1> = getelementptr inbounds ir<%y>, vp<%3>
1237+ ; CHECK-NEXT: vp<%5> = vector-pointer ir<%arrayidx1>
1238+ ; CHECK-NEXT: WIDEN ir<%load1> = load vp<%5>
1239+ ; CHECK-NEXT: MULACC-REDUCE ir<%add> = ir<%r.09> + reduce.add (mul ir<%load0>, ir<%load1>)
1240+ ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0>
1241+ ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%1>
1242+ ; CHECK-NEXT: No successors
1243+ ; CHECK-NEXT: }
1244+ ; CHECK-NEXT: Successor(s): middle.block
1245+ ; CHECK-EMPTY:
1246+ ; CHECK-NEXT: middle.block:
1247+ ; CHECK-NEXT: EMIT vp<%7> = compute-reduction-result ir<%r.09>, ir<%add>
1248+ ; CHECK-NEXT: EMIT vp<%8> = extract-from-end vp<%7>, ir<1>
1249+ ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<%1>
1250+ ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
1251+ ; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
1252+ ; CHECK-EMPTY:
1253+ ; CHECK-NEXT: ir-bb<for.cond.cleanup.loopexit>:
1254+ ; CHECK-NEXT: IR %add.lcssa = phi i64 [ %add, %for.body ] (extra operand: vp<%8>)
1255+ ; CHECK-NEXT: No successors
1256+ ; CHECK-EMPTY:
1257+ ; CHECK-NEXT: scalar.ph:
1258+ ; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<%7>, ir<0>
1259+ ; CHECK-NEXT: Successor(s): ir-bb<for.body>
1260+ ; CHECK-EMPTY:
1261+ ; CHECK-NEXT: ir-bb<for.body>:
1262+ ; CHECK-NEXT: IR %i.010 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
1263+ ; CHECK-NEXT: IR %r.09 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<%bc.merge.rdx>)
1264+ ; CHECK-NEXT: IR %arrayidx = getelementptr inbounds i64, ptr %x, i32 %i.010
1265+ ; CHECK-NEXT: IR %load0 = load i64, ptr %arrayidx, align 4
1266+ ; CHECK-NEXT: IR %arrayidx1 = getelementptr inbounds i64, ptr %y, i32 %i.010
1267+ ; CHECK-NEXT: IR %load1 = load i64, ptr %arrayidx1, align 4
1268+ ; CHECK-NEXT: IR %mul = mul nsw i64 %load0, %load1
1269+ ; CHECK-NEXT: IR %add = add nsw i64 %r.09, %mul
1270+ ; CHECK-NEXT: IR %inc = add nuw nsw i32 %i.010, 1
1271+ ; CHECK-NEXT: IR %exitcond = icmp eq i32 %inc, %n
1272+ ; CHECK-NEXT: No successors
1273+ ; CHECK-NEXT: }
1274+ ;
1275+ entry:
1276+ %cmp8 = icmp sgt i32 %n , 0
1277+ br i1 %cmp8 , label %for.body , label %for.cond.cleanup
1278+
1279+ for.body: ; preds = %entry, %for.body
1280+ %i.010 = phi i32 [ %inc , %for.body ], [ 0 , %entry ]
1281+ %r.09 = phi i64 [ %add , %for.body ], [ 0 , %entry ]
1282+ %arrayidx = getelementptr inbounds i64 , ptr %x , i32 %i.010
1283+ %load0 = load i64 , ptr %arrayidx , align 4
1284+ %arrayidx1 = getelementptr inbounds i64 , ptr %y , i32 %i.010
1285+ %load1 = load i64 , ptr %arrayidx1 , align 4
1286+ %mul = mul nsw i64 %load0 , %load1
1287+ %add = add nsw i64 %r.09 , %mul
1288+ %inc = add nuw nsw i32 %i.010 , 1
1289+ %exitcond = icmp eq i32 %inc , %n
1290+ br i1 %exitcond , label %for.cond.cleanup , label %for.body
1291+
1292+ for.cond.cleanup: ; preds = %for.body, %entry
1293+ %r.0.lcssa = phi i64 [ 0 , %entry ], [ %add , %for.body ]
1294+ ret i64 %r.0.lcssa
1295+ }
1296+
1297+ define i64 @print_mulacc_extended (ptr nocapture readonly %x , ptr nocapture readonly %y , i32 %n ) {
1298+ ; CHECK-LABEL: 'print_mulacc_extended'
1299+ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
1300+ ; CHECK-NEXT: Live-in vp<%0> = VF * UF
1301+ ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
1302+ ; CHECK-NEXT: Live-in ir<%n> = original trip-count
1303+ ; CHECK-EMPTY:
1304+ ; CHECK-NEXT: vector.ph:
1305+ ; CHECK-NEXT: Successor(s): vector loop
1306+ ; CHECK-EMPTY:
1307+ ; CHECK-NEXT: <x1> vector loop: {
1308+ ; CHECK-NEXT: vector.body:
1309+ ; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
1310+ ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%r.09> = phi ir<0>, ir<%add>
1311+ ; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
1312+ ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<%3>
1313+ ; CHECK-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
1314+ ; CHECK-NEXT: WIDEN ir<%load0> = load vp<%4>
1315+ ; CHECK-NEXT: CLONE ir<%arrayidx1> = getelementptr inbounds ir<%y>, vp<%3>
1316+ ; CHECK-NEXT: vp<%5> = vector-pointer ir<%arrayidx1>
1317+ ; CHECK-NEXT: WIDEN ir<%load1> = load vp<%5>
1318+ ; CHECK-NEXT: MULACC-REDUCE ir<%add> = ir<%r.09> + (reduce.add (mul (ir<%load0> extended to i32), (ir<%load1> extended to i32)) extended to i64)
1319+ ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0>
1320+ ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%1>
1321+ ; CHECK-NEXT: No successors
1322+ ; CHECK-NEXT: }
1323+ ; CHECK-NEXT: Successor(s): middle.block
1324+ ; CHECK-EMPTY:
1325+ ; CHECK-NEXT: middle.block:
1326+ ; CHECK-NEXT: EMIT vp<%7> = compute-reduction-result ir<%r.09>, ir<%add>
1327+ ; CHECK-NEXT: EMIT vp<%8> = extract-from-end vp<%7>, ir<1>
1328+ ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<%1>
1329+ ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
1330+ ; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
1331+ ; CHECK-EMPTY:
1332+ ; CHECK-NEXT: ir-bb<for.cond.cleanup.loopexit>:
1333+ ; CHECK-NEXT: IR %add.lcssa = phi i64 [ %add, %for.body ] (extra operand: vp<%8>)
1334+ ; CHECK-NEXT: No successors
1335+ ; CHECK-EMPTY:
1336+ ; CHECK-NEXT: scalar.ph:
1337+ ; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<%7>, ir<0>
1338+ ; CHECK-NEXT: Successor(s): ir-bb<for.body>
1339+ ; CHECK-EMPTY:
1340+ ; CHECK-NEXT: ir-bb<for.body>:
1341+ ; CHECK-NEXT: IR %i.010 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
1342+ ; CHECK-NEXT: IR %r.09 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<%bc.merge.rdx>)
1343+ ; CHECK-NEXT: IR %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.010
1344+ ; CHECK-NEXT: IR %load0 = load i16, ptr %arrayidx, align 4
1345+ ; CHECK-NEXT: IR %arrayidx1 = getelementptr inbounds i16, ptr %y, i32 %i.010
1346+ ; CHECK-NEXT: IR %load1 = load i16, ptr %arrayidx1, align 4
1347+ ; CHECK-NEXT: IR %conv0 = sext i16 %load0 to i32
1348+ ; CHECK-NEXT: IR %conv1 = sext i16 %load1 to i32
1349+ ; CHECK-NEXT: IR %mul = mul nsw i32 %conv0, %conv1
1350+ ; CHECK-NEXT: IR %conv = sext i32 %mul to i64
1351+ ; CHECK-NEXT: IR %add = add nsw i64 %r.09, %conv
1352+ ; CHECK-NEXT: IR %inc = add nuw nsw i32 %i.010, 1
1353+ ; CHECK-NEXT: IR %exitcond = icmp eq i32 %inc, %n
1354+ ; CHECK-NEXT: No successors
1355+ ; CHECK-NEXT: }
1356+ ;
1357+ entry:
1358+ %cmp8 = icmp sgt i32 %n , 0
1359+ br i1 %cmp8 , label %for.body , label %for.cond.cleanup
1360+
1361+ for.body: ; preds = %entry, %for.body
1362+ %i.010 = phi i32 [ %inc , %for.body ], [ 0 , %entry ]
1363+ %r.09 = phi i64 [ %add , %for.body ], [ 0 , %entry ]
1364+ %arrayidx = getelementptr inbounds i16 , ptr %x , i32 %i.010
1365+ %load0 = load i16 , ptr %arrayidx , align 4
1366+ %arrayidx1 = getelementptr inbounds i16 , ptr %y , i32 %i.010
1367+ %load1 = load i16 , ptr %arrayidx1 , align 4
1368+ %conv0 = sext i16 %load0 to i32
1369+ %conv1 = sext i16 %load1 to i32
1370+ %mul = mul nsw i32 %conv0 , %conv1
1371+ %conv = sext i32 %mul to i64
1372+ %add = add nsw i64 %r.09 , %conv
1373+ %inc = add nuw nsw i32 %i.010 , 1
1374+ %exitcond = icmp eq i32 %inc , %n
1375+ br i1 %exitcond , label %for.cond.cleanup , label %for.body
1376+
1377+ for.cond.cleanup: ; preds = %for.body, %entry
1378+ %r.0.lcssa = phi i64 [ 0 , %entry ], [ %add , %for.body ]
1379+ ret i64 %r.0.lcssa
1380+ }
1381+
11461382!llvm.dbg.cu = !{!0 }
11471383!llvm.module.flags = !{!3 , !4 }
11481384
0 commit comments