Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions llvm/test/Transforms/LoopVectorize/vplan-noinbounds-gep.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -enable-vplan-native-path -S %s | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -enable-vplan-native-path -S %s | FileCheck %s
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s

no nested loops, so no the 'native' path isn't used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


source_filename = "<stdin>"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

drop

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this X86 specific? If so, it needs to be moved to the X86 subdirectory

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


; Function Attrs: nofree norecurse nosync nounwind memory(none)
define i1 @fn() local_unnamed_addr #0 {
; CHECK-LABEL: define i1 @fn(
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NNO:%.*]] = alloca [12 x i32], align 16
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 10, i64 9, i64 8, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 10, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], splat (i64 10)
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [12 x i32], ptr [[NNO]], i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -3
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[REVERSE]], <4 x i32> poison)
; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[REVERSE1]], splat (i32 1)
; CHECK-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 10)
; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[REVERSE1]], <4 x i32> [[TMP8]]
; CHECK-NEXT: [[TMP11]] = or <4 x i32> [[PREDPHI]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP11]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP12]])
; CHECK-NEXT: br i1 true, label [[FOR_END36:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -2, [[MIDDLE_BLOCK]] ], [ 10, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_BODY20:%.*]]
; CHECK: for.body20:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC35:%.*]] ]
; CHECK-NEXT: [[SUM_01:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_1:%.*]], [[FOR_INC35]] ]
; CHECK-NEXT: [[REM4:%.*]] = and i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[CMP21:%.*]] = icmp eq i64 [[REM4]], 0
; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw [12 x i32], ptr [[NNO]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX24]], align 4
; CHECK-NEXT: br i1 [[CMP21]], label [[IF_THEN22:%.*]], label [[FOR_INC35]]
; CHECK: if.then22:
; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[TMP15]], 1
; CHECK-NEXT: [[REM27:%.*]] = urem i32 [[MUL]], 10
; CHECK-NEXT: br label [[FOR_INC35]]
; CHECK: for.inc35:
; CHECK-NEXT: [[REM27_PN:%.*]] = phi i32 [ [[REM27]], [[IF_THEN22]] ], [ [[TMP15]], [[FOR_BODY20]] ]
; CHECK-NEXT: [[SUM_1]] = or i32 [[REM27_PN]], [[SUM_01]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[CMP19_NOT:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[FOR_END36]], label [[FOR_BODY20]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: for.end36:
; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_INC35]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[CMP41:%.*]] = icmp eq i32 [[SUM_1_LCSSA]], 0
; CHECK-NEXT: ret i1 [[CMP41]]
;
entry:
%nno = alloca [12 x i32], align 16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be passed as argument? Otherwise it needs to be initialized (e.g. passing to a @init() function, to avoid reading uninitialized memory

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

br label %for.body20

for.body20: ; preds = %entry, %for.inc35
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to clean up names, e.g.

Suggested change
for.body20: ; preds = %entry, %for.inc35
loop.header:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done (this was from llvm-reduce :))

%indvars.iv = phi i64 [ 10, %entry ], [ %indvars.iv.next, %for.inc35 ]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%indvars.iv = phi i64 [ 10, %entry ], [ %indvars.iv.next, %for.inc35 ]
%iv = phi i64 [ 10, %entry ], [ %indvars.iv.next, %for.inc35 ]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

%sum.01 = phi i32 [ 0, %entry ], [ %sum.1, %for.inc35 ]
%rem4 = and i64 %indvars.iv, 1
%cmp21 = icmp eq i64 %rem4, 0
%arrayidx24 = getelementptr inbounds nuw [12 x i32], ptr %nno, i64 0, i64 %indvars.iv
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%arrayidx24 = getelementptr inbounds nuw [12 x i32], ptr %nno, i64 0, i64 %indvars.iv
%gep = getelementptr inbounds nuw i32, ptr %nno, i64 %indvars.iv

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

%0 = load i32, ptr %arrayidx24, align 4
br i1 %cmp21, label %if.then22, label %for.inc35

if.then22: ; preds = %for.body20
%mul = shl i32 %0, 1
%rem27 = urem i32 %mul, 10
br label %for.inc35

for.inc35: ; preds = %for.body20, %if.then22
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for.inc35: ; preds = %for.body20, %if.then22
loop.latch:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

%rem27.pn = phi i32 [ %rem27, %if.then22 ], [ %0, %for.body20 ]
%sum.1 = or i32 %rem27.pn, %sum.01
%indvars.iv.next = add nsw i64 %indvars.iv, -1
%cmp19.not = icmp eq i64 %indvars.iv, 0
br i1 %cmp19.not, label %for.end36, label %for.body20

for.end36: ; preds = %for.inc35
%sum.1.lcssa = phi i32 [ %sum.1, %for.inc35 ]
%cmp41 = icmp eq i32 %sum.1.lcssa, 0
ret i1 %cmp41
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for.end36: ; preds = %for.inc35
%sum.1.lcssa = phi i32 [ %sum.1, %for.inc35 ]
%cmp41 = icmp eq i32 %sum.1.lcssa, 0
ret i1 %cmp41
exit:
%sum.1.lcssa = phi i32 [ %sum.1, %for.inc35 ]
ret i32 %sum.1.lcssa

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}

attributes #0 = { nofree norecurse nosync nounwind memory(none) "target-features"="+aes,+avx,+cmov,+crc32,+cx16,+cx8,+fxsr,+mmx,+pclmul,+popcnt,+prfchw,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
Loading