-
Notifications
You must be signed in to change notification settings - Fork 14.7k
Labels
Description
Consider an intrinsic that has a valid fixed-vector, but invalid scalabe-vector cost like in https://godbolt.org/z/4M46fvMnc:
; RUN: opt -mtriple aarch64 -mattr=+sve -passes="print<cost-model>"
define <4 x float> @minimumnum.fixed(<4 x float> %a, <4 x float> %b) {
%c = call <4 x float> @llvm.minimumnum(<4 x float> %a, <4 x float> %b)
ret <4 x float> %c
}
define <vscale x 4 x float> @minimumnum(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
%c = call <vscale x 4 x float> @llvm.minimumnum(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
ret <vscale x 4 x float> %c
}
Now, running loop-vectorize should respect this invalid cost, and not produce a <vscale x 4 x float> @llvm.minimumnum
. Running this:
; RUN: opt -passes=loop-vectorize,simplifycfg -mtriple=aarch64 -mattr=+sve -S %s
define void @vectorized_hoisted(ptr %p) {
entry:
br label %loop
loop: ; preds = %loop, %entry
%iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
%idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
%res = tail call float @llvm.minimumnum.f32(float 0.0, float 0.0)
%gep.p.red = getelementptr float, ptr %p, i64 %idx
store float %res, ptr %gep.p.red, align 4
%idx.next = add i64 %idx, 1
%iv.next = add i64 %iv, 1
%exit.cond = icmp eq i64 %iv.next, 0
br i1 %exit.cond, label %exit, label %loop
exit: ; preds = %loop
ret void
}
declare float @llvm.minimumnum.f32(float, float)
yields:
; ModuleID = 'reduced.ll'
source_filename = "reduced.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64"
define void @vectorized_hoisted(ptr %p) #0 {
entry:
%0 = call i64 @llvm.vscale.i64()
%1 = mul nuw i64 %0, 8
%n.mod.vf = urem i64 -1, %1
%n.vec = sub i64 -1, %n.mod.vf
%2 = call i64 @llvm.vscale.i64()
%3 = mul nuw i64 %2, 8
%4 = add i64 1, %n.vec
%5 = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer)
br label %vector.body
vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%6 = getelementptr float, ptr %p, i64 %index
%7 = call i64 @llvm.vscale.i64()
%8 = mul nuw i64 %7, 4
%9 = getelementptr float, ptr %6, i64 %8
store <vscale x 4 x float> %5, ptr %6, align 4
store <vscale x 4 x float> %5, ptr %9, align 4
%index.next = add nuw i64 %index, %3
%10 = icmp eq i64 %index.next, %n.vec
br i1 %10, label %middle.block, label %vector.body, !llvm.loop !0
middle.block: ; preds = %vector.body
%cmp.n = icmp eq i64 -1, %n.vec
br i1 %cmp.n, label %exit, label %loop
loop: ; preds = %middle.block, %loop
%iv = phi i64 [ %iv.next, %loop ], [ %4, %middle.block ]
%idx = phi i64 [ %idx.next, %loop ], [ %n.vec, %middle.block ]
%res = tail call float @llvm.minimumnum.f32(float 0.000000e+00, float 0.000000e+00)
%gep.p.red = getelementptr float, ptr %p, i64 %idx
store float %res, ptr %gep.p.red, align 4
%idx.next = add i64 %idx, 1
%iv.next = add i64 %iv, 1
%exit.cond = icmp eq i64 %iv.next, 0
br i1 %exit.cond, label %exit, label %loop, !llvm.loop !3
exit: ; preds = %middle.block, %loop
ret void
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.minimumnum.f32(float, float) #1
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare i64 @llvm.vscale.i64() #2
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) #3
attributes #0 = { "target-features"="+sve" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-features"="+sve" }
attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
!0 = distinct !{!0, !1, !2}
!1 = !{!"llvm.loop.isvectorized", i32 1}
!2 = !{!"llvm.loop.unroll.runtime.disable"}
!3 = distinct !{!3, !2, !1}
LV seems to cost it using the fixed-vector cost (3), instead of the scalable-vector cost (invalid). No crash is observed yet due to this bug, but #145545 exhibits a crash.
CC: @davemgreen, @fhahn, @lukel97.