Skip to content

[LV] Hoisted vector code is costed without vscale #151664

@artagnon

Description

@artagnon

Consider an intrinsic that has a valid fixed-vector, but invalid scalabe-vector cost like in https://godbolt.org/z/4M46fvMnc:

; RUN: opt -mtriple aarch64 -mattr=+sve -passes="print<cost-model>"

define <4 x float> @minimumnum.fixed(<4 x float> %a, <4 x float> %b) {
  %c = call <4 x float> @llvm.minimumnum(<4 x float> %a, <4 x float> %b)
  ret <4 x float> %c
}

define <vscale x 4 x float> @minimumnum(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
  %c = call <vscale x 4 x float> @llvm.minimumnum(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
  ret <vscale x 4 x float> %c
}

Now, running loop-vectorize should respect this invalid cost, and not produce a <vscale x 4 x float> @llvm.minimumnum. Running this:

; RUN: opt -passes=loop-vectorize,simplifycfg -mtriple=aarch64 -mattr=+sve -S %s

define void @vectorized_hoisted(ptr %p) {
entry:
  br label %loop

loop:                                              ; preds = %loop, %entry
  %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
  %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
  %res = tail call float @llvm.minimumnum.f32(float 0.0, float 0.0)
  %gep.p.red = getelementptr float, ptr %p, i64 %idx
  store float %res, ptr %gep.p.red, align 4
  %idx.next = add i64 %idx, 1
  %iv.next = add i64 %iv, 1
  %exit.cond = icmp eq i64 %iv.next, 0
  br i1 %exit.cond, label %exit, label %loop

exit:                                              ; preds = %loop
  ret void
}

declare float @llvm.minimumnum.f32(float, float)

yields:

; ModuleID = 'reduced.ll'
source_filename = "reduced.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64"

define void @vectorized_hoisted(ptr %p) #0 {
entry:
  %0 = call i64 @llvm.vscale.i64()
  %1 = mul nuw i64 %0, 8
  %n.mod.vf = urem i64 -1, %1
  %n.vec = sub i64 -1, %n.mod.vf
  %2 = call i64 @llvm.vscale.i64()
  %3 = mul nuw i64 %2, 8
  %4 = add i64 1, %n.vec
  %5 = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer)
  br label %vector.body

vector.body:                                      ; preds = %vector.body, %entry
  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
  %6 = getelementptr float, ptr %p, i64 %index
  %7 = call i64 @llvm.vscale.i64()
  %8 = mul nuw i64 %7, 4
  %9 = getelementptr float, ptr %6, i64 %8
  store <vscale x 4 x float> %5, ptr %6, align 4
  store <vscale x 4 x float> %5, ptr %9, align 4
  %index.next = add nuw i64 %index, %3
  %10 = icmp eq i64 %index.next, %n.vec
  br i1 %10, label %middle.block, label %vector.body, !llvm.loop !0

middle.block:                                     ; preds = %vector.body
  %cmp.n = icmp eq i64 -1, %n.vec
  br i1 %cmp.n, label %exit, label %loop

loop:                                             ; preds = %middle.block, %loop
  %iv = phi i64 [ %iv.next, %loop ], [ %4, %middle.block ]
  %idx = phi i64 [ %idx.next, %loop ], [ %n.vec, %middle.block ]
  %res = tail call float @llvm.minimumnum.f32(float 0.000000e+00, float 0.000000e+00)
  %gep.p.red = getelementptr float, ptr %p, i64 %idx
  store float %res, ptr %gep.p.red, align 4
  %idx.next = add i64 %idx, 1
  %iv.next = add i64 %iv, 1
  %exit.cond = icmp eq i64 %iv.next, 0
  br i1 %exit.cond, label %exit, label %loop, !llvm.loop !3

exit:                                             ; preds = %middle.block, %loop
  ret void
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.minimumnum.f32(float, float) #1

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare i64 @llvm.vscale.i64() #2

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) #3

attributes #0 = { "target-features"="+sve" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-features"="+sve" }
attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

!0 = distinct !{!0, !1, !2}
!1 = !{!"llvm.loop.isvectorized", i32 1}
!2 = !{!"llvm.loop.unroll.runtime.disable"}
!3 = distinct !{!3, !2, !1}

LV seems to cost it using the fixed-vector cost (3), instead of the scalable-vector cost (invalid). No crash is observed yet due to this bug, but #145545 exhibits a crash.

CC: @davemgreen, @fhahn, @lukel97.

Metadata

Metadata

Assignees

Type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions