Skip to content

Commit 2848e28

Browse files
committed
[LV] Add test with partial reduction without narrowing.
1 parent 148a835 commit 2848e28

File tree

1 file changed

+125
-0
lines changed

1 file changed

+125
-0
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,6 +1381,131 @@ for.body: ; preds = %for.body.preheader,
13811381
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1
13821382
}
13831383

1384+
define i32 @red_extended_add_chain(ptr %start, ptr %end, i32 %offset) {
1385+
; CHECK-NEON-LABEL: define i32 @red_extended_add_chain(
1386+
; CHECK-NEON-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] {
1387+
; CHECK-NEON-NEXT: entry:
1388+
; CHECK-NEON-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
1389+
; CHECK-NEON-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
1390+
; CHECK-NEON-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1
1391+
; CHECK-NEON-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
1392+
; CHECK-NEON-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
1393+
; CHECK-NEON-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1394+
; CHECK-NEON: vector.ph:
1395+
; CHECK-NEON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 16
1396+
; CHECK-NEON-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
1397+
; CHECK-NEON-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
1398+
; CHECK-NEON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[OFFSET]], i64 0
1399+
; CHECK-NEON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
1400+
; CHECK-NEON-NEXT: br label [[VECTOR_BODY:%.*]]
1401+
; CHECK-NEON: vector.body:
1402+
; CHECK-NEON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1403+
; CHECK-NEON-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
1404+
; CHECK-NEON-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
1405+
; CHECK-NEON-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
1406+
; CHECK-NEON-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
1407+
; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <16 x i32> @llvm.experimental.vector.partial.reduce.add.v16i32.v16i32(<16 x i32> [[VEC_PHI]], <16 x i32> [[TMP3]])
1408+
; CHECK-NEON-NEXT: [[TMP4]] = add <16 x i32> [[PARTIAL_REDUCE]], [[BROADCAST_SPLAT]]
1409+
; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1410+
; CHECK-NEON-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1411+
; CHECK-NEON-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1412+
; CHECK-NEON: middle.block:
1413+
; CHECK-NEON-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP4]])
1414+
; CHECK-NEON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
1415+
; CHECK-NEON-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1416+
; CHECK-NEON: scalar.ph:
1417+
;
1418+
; CHECK-SVE-LABEL: define i32 @red_extended_add_chain(
1419+
; CHECK-SVE-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] {
1420+
; CHECK-SVE-NEXT: entry:
1421+
; CHECK-SVE-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
1422+
; CHECK-SVE-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
1423+
; CHECK-SVE-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1
1424+
; CHECK-SVE-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
1425+
; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1426+
; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
1427+
; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]]
1428+
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1429+
; CHECK-SVE: vector.ph:
1430+
; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1431+
; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
1432+
; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
1433+
; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
1434+
; CHECK-SVE-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
1435+
; CHECK-SVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OFFSET]], i64 0
1436+
; CHECK-SVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1437+
; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]]
1438+
; CHECK-SVE: vector.body:
1439+
; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1440+
; CHECK-SVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
1441+
; CHECK-SVE-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
1442+
; CHECK-SVE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[NEXT_GEP]], align 1
1443+
; CHECK-SVE-NEXT: [[TMP7:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
1444+
; CHECK-SVE-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP7]]
1445+
; CHECK-SVE-NEXT: [[TMP9]] = add <vscale x 4 x i32> [[TMP8]], [[BROADCAST_SPLAT]]
1446+
; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1447+
; CHECK-SVE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1448+
; CHECK-SVE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1449+
; CHECK-SVE: middle.block:
1450+
; CHECK-SVE-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP9]])
1451+
; CHECK-SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
1452+
; CHECK-SVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1453+
; CHECK-SVE: scalar.ph:
1454+
;
1455+
; CHECK-SVE-MAXBW-LABEL: define i32 @red_extended_add_chain(
1456+
; CHECK-SVE-MAXBW-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] {
1457+
; CHECK-SVE-MAXBW-NEXT: entry:
1458+
; CHECK-SVE-MAXBW-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
1459+
; CHECK-SVE-MAXBW-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
1460+
; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1
1461+
; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
1462+
; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1463+
; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
1464+
; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]]
1465+
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1466+
; CHECK-SVE-MAXBW: vector.ph:
1467+
; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1468+
; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
1469+
; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
1470+
; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
1471+
; CHECK-SVE-MAXBW-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
1472+
; CHECK-SVE-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[OFFSET]], i64 0
1473+
; CHECK-SVE-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1474+
; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
1475+
; CHECK-SVE-MAXBW: vector.body:
1476+
; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1477+
; CHECK-SVE-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
1478+
; CHECK-SVE-MAXBW-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
1479+
; CHECK-SVE-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[NEXT_GEP]], align 1
1480+
; CHECK-SVE-MAXBW-NEXT: [[TMP7:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
1481+
; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call <vscale x 8 x i32> @llvm.experimental.vector.partial.reduce.add.nxv8i32.nxv8i32(<vscale x 8 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP7]])
1482+
; CHECK-SVE-MAXBW-NEXT: [[TMP8]] = add <vscale x 8 x i32> [[PARTIAL_REDUCE]], [[BROADCAST_SPLAT]]
1483+
; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1484+
; CHECK-SVE-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1485+
; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1486+
; CHECK-SVE-MAXBW: middle.block:
1487+
; CHECK-SVE-MAXBW-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP8]])
1488+
; CHECK-SVE-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
1489+
; CHECK-SVE-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1490+
; CHECK-SVE-MAXBW: scalar.ph:
1491+
;
1492+
entry:
1493+
br label %loop
1494+
1495+
loop:
1496+
%ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ]
1497+
%red = phi i32 [ 0, %entry ], [ %red.next, %loop ]
1498+
%l = load i8, ptr %ptr.iv, align 1
1499+
%l.ext = zext i8 %l to i32
1500+
%add = add i32 %red, %l.ext
1501+
%red.next = add i32 %add, %offset
1502+
%gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
1503+
%ec = icmp eq ptr %ptr.iv, %end
1504+
br i1 %ec, label %exit, label %loop
1505+
1506+
exit:
1507+
ret i32 %red.next
1508+
}
13841509

13851510
attributes #0 = { vscale_range(1,16) }
13861511

0 commit comments

Comments
 (0)