-
Notifications
You must be signed in to change notification settings - Fork 14.7k
Open
Description
https://llvm.godbolt.org/z/jeT9a97P7
; RUN: opt -S -passes=loop-vectorize -mtriple=x86_64-unknown-linux-gnu
define void @test(ptr %base, i64 %len, ptr %scale.ptr) {
start:
%len.idx = shl nuw nsw i64 %len, 3
%end = getelementptr inbounds nuw i8, ptr %base, i64 %len.idx
%_228 = icmp eq i64 %len, 0
br i1 %_228, label %bb2, label %bb3.lr.ph
bb3.lr.ph:
br label %bb3
bb3:
%iter.sroa.0.09 = phi ptr [ %base, %bb3.lr.ph ], [ %_32, %bb3 ]
%_32 = getelementptr inbounds nuw i8, ptr %iter.sroa.0.09, i64 8
%_8 = load float, ptr %scale.ptr , align 4
%2 = load float, ptr %iter.sroa.0.09, align 4
%3 = fmul float %_8, %2
store float %3, ptr %iter.sroa.0.09, align 4
%_9 = load float, ptr %scale.ptr , align 4
%4 = getelementptr inbounds nuw i8, ptr %iter.sroa.0.09, i64 4
%5 = load float, ptr %4, align 4
%6 = fmul float %_9, %5
store float %6, ptr %4, align 4
%_22 = icmp eq ptr %_32, %end
br i1 %_22, label %bb2.loopexit, label %bb3
bb2.loopexit:
br label %bb2
bb2:
ret void
}
Generates a runtime check and this vector body:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%offset.idx = mul i64 %index, 8
%next.gep = getelementptr i8, ptr %base, i64 %offset.idx
%9 = load float, ptr %scale.ptr, align 4, !alias.scope !0
%broadcast.splatinsert = insertelement <2 x float> poison, float %9, i64 0
%broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
%wide.vec = load <4 x float>, ptr %next.gep, align 4
%strided.vec = shufflevector <4 x float> %wide.vec, <4 x float> poison, <2 x i32> <i32 0, i32 2>
%strided.vec2 = shufflevector <4 x float> %wide.vec, <4 x float> poison, <2 x i32> <i32 1, i32 3>
%10 = fmul <2 x float> %broadcast.splat, %strided.vec
%11 = load float, ptr %scale.ptr, align 4, !alias.scope !0
%broadcast.splatinsert3 = insertelement <2 x float> poison, float %11, i64 0
%broadcast.splat4 = shufflevector <2 x float> %broadcast.splatinsert3, <2 x float> poison, <2 x i32> zeroinitializer
%12 = fmul <2 x float> %broadcast.splat4, %strided.vec2
%13 = shufflevector <2 x float> %10, <2 x float> %12, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%interleaved.vec = shufflevector <4 x float> %13, <4 x float> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
store <4 x float> %interleaved.vec, ptr %next.gep, align 4
%index.next = add nuw i64 %index, 2
%14 = icmp eq i64 %index.next, %n.vec
br i1 %14, label %middle.block, label %vector.body, !llvm.loop !3
Note that there is !noalias
metadata on the store
, which means that the loads of the scaling factor cannot be LICMed.
The metadata is present if the initial loop only stores one value: https://llvm.godbolt.org/z/3Yrhna9je