Skip to content
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions llvm/test/Transforms/LoopVectorize/X86/vplan-noinbounds-gep.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: nofree norecurse nosync nounwind memory(none)
define i1 @fn(ptr %nno) local_unnamed_addr #0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
define i1 @fn(ptr %nno) local_unnamed_addr #0 {
define i1 @fn(ptr %nno) #0 {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to add a comment sayin what this tests, e.g. something like FIXME: GEP flags on GEPs for reverse vector pointer need to be dropped when folding the tail.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

; CHECK-LABEL: define i1 @fn(
; CHECK-SAME: ptr [[NNO:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 10, i64 9, i64 8, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 10, [[INDEX]]
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], splat (i64 10)
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [12 x i32], ptr [[NNO]], i64 0, i64 [[TMP22]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -3
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[REVERSE]], <4 x i32> poison)
; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[REVERSE1]], splat (i32 1)
; CHECK-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 10)
; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[REVERSE1]], <4 x i32> [[TMP8]]
; CHECK-NEXT: [[TMP11]] = or <4 x i32> [[PREDPHI]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP11]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP12]])
; CHECK-NEXT: br i1 true, label [[FOR_END36:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -2, [[MIDDLE_BLOCK]] ], [ 10, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_BODY20:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC35:%.*]] ]
; CHECK-NEXT: [[SUM_01:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_1:%.*]], [[FOR_INC35]] ]
; CHECK-NEXT: [[REM4:%.*]] = and i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[CMP21:%.*]] = icmp eq i64 [[REM4]], 0
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [12 x i32], ptr [[NNO]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: br i1 [[CMP21]], label [[IF_THEN22:%.*]], label [[FOR_INC35]]
; CHECK: if.then:
; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[TMP15]], 1
; CHECK-NEXT: [[REM27:%.*]] = urem i32 [[MUL]], 10
; CHECK-NEXT: br label [[FOR_INC35]]
; CHECK: loop.latch:
; CHECK-NEXT: [[REM27_PN:%.*]] = phi i32 [ [[REM27]], [[IF_THEN22]] ], [ [[TMP15]], [[FOR_BODY20]] ]
; CHECK-NEXT: [[SUM_1]] = or i32 [[REM27_PN]], [[SUM_01]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[CMP19_NOT:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[FOR_END36]], label [[FOR_BODY20]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_INC35]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[CMP41:%.*]] = icmp eq i32 [[SUM_1_LCSSA]], 0
; CHECK-NEXT: ret i1 [[CMP41]]
;
entry:
br label %loop.header

loop.header: ; preds = %entry, %loop.latch
%iv = phi i64 [ 10, %entry ], [ %iv.next, %loop.latch ]
%sum.01 = phi i32 [ 0, %entry ], [ %sum.1, %loop.latch ]
%rem4 = and i64 %iv, 1
%cmp21 = icmp eq i64 %rem4, 0
%gep = getelementptr inbounds nuw [12 x i32], ptr %nno, i64 0, i64 %iv
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%gep = getelementptr inbounds nuw [12 x i32], ptr %nno, i64 0, i64 %iv
%gep = getelementptr inbounds nuw i32, ptr %nno, i64 %iv

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

%0 = load i32, ptr %gep, align 4
br i1 %cmp21, label %if.then, label %loop.latch

if.then: ; preds = %loop.header
%mul = shl i32 %0, 1
%rem27 = urem i32 %mul, 10
br label %loop.latch

loop.latch: ; preds = %loop.header, %if.then
%rem27.pn = phi i32 [ %rem27, %if.then ], [ %0, %loop.header ]
%sum.1 = or i32 %rem27.pn, %sum.01
%iv.next = add nsw i64 %iv, -1
%cmp19.not = icmp eq i64 %iv, 0
br i1 %cmp19.not, label %exit, label %loop.header

exit: ; preds = %loop.latch
%sum.1.lcssa = phi i32 [ %sum.1, %loop.latch ]
%cmp41 = icmp eq i32 %sum.1.lcssa, 0
ret i1 %cmp41
}

attributes #0 = { nofree norecurse nosync nounwind memory(none) "target-features"="+aes,+avx,+cmov,+crc32,+cx16,+cx8,+fxsr,+mmx,+pclmul,+popcnt,+prfchw,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please clean those up and retain only th eneeded flags (+avx?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Loading