11; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2- ; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK
2+ ; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK
33
44; Exercise tail folding on RISCV w/scalable vectors.
55
@@ -330,17 +330,44 @@ for.end:
330330define i64 @uniform_load (ptr noalias nocapture %a , ptr noalias nocapture %b , i64 %n ) {
331331; CHECK-LABEL: @uniform_load(
332332; CHECK-NEXT: entry:
333+ ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
334+ ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
335+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
336+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
337+ ; CHECK: vector.ph:
338+ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
339+ ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
340+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
341+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
342+ ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
343+ ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2
333344; CHECK-NEXT: br label [[FOR_BODY:%.*]]
334- ; CHECK: for .body:
335- ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.* ]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
345+ ; CHECK: vector .body:
346+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
336347; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8
348+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0
349+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
337350; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]]
338- ; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
339- ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
340- ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
341- ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
351+ ; CHECK-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[ARRAYIDX]], align 8
352+ ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP5]]
353+ ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
354+ ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
355+ ; CHECK: middle.block:
356+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]]
357+ ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
358+ ; CHECK: scalar.ph:
359+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
360+ ; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
361+ ; CHECK: for.body:
362+ ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ]
363+ ; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[B]], align 8
364+ ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV1]]
365+ ; CHECK-NEXT: store i64 [[V1]], ptr [[ARRAYIDX1]], align 8
366+ ; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
367+ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], 1025
368+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP14:![0-9]+]]
342369; CHECK: for.end:
343- ; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY ]] ]
370+ ; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V1]], [[FOR_BODY1]] ], [ [[ V]], [[MIDDLE_BLOCK ]] ]
344371; CHECK-NEXT: ret i64 [[V_LCSSA]]
345372;
346373entry:
@@ -389,7 +416,7 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) {
389416; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]]
390417; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
391418; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
392- ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13 :![0-9]+]]
419+ ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15 :![0-9]+]]
393420; CHECK: middle.block:
394421; CHECK-NEXT: br label [[FOR_END:%.*]]
395422; CHECK: scalar.ph:
@@ -403,7 +430,7 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) {
403430; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8
404431; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
405432; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
406- ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP14 :![0-9]+]]
433+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP16 :![0-9]+]]
407434; CHECK: for.end:
408435; CHECK-NEXT: ret void
409436;
0 commit comments