Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -5511,6 +5511,15 @@ multiclass IntegerToFPSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, SDPa
let Inst{31} = 1; // 64-bit FPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
}

def : Pat<(f16 (node (i32 (extractelt (v4i32 V128:$Rn), (i64 0))))),
(!cast<Instruction>(NAME # HSr) (EXTRACT_SUBREG $Rn, ssub))>;
def : Pat<(f64 (node (i32 (extractelt (v4i32 V128:$Rn), (i64 0))))),
(!cast<Instruction>(NAME # DSr) (EXTRACT_SUBREG $Rn, ssub))>;
def : Pat<(f16 (node (i64 (extractelt (v2i64 V128:$Rn), (i64 0))))),
(!cast<Instruction>(NAME # HDr) (EXTRACT_SUBREG $Rn, dsub))>;
def : Pat<(f32 (node (i64 (extractelt (v2i64 V128:$Rn), (i64 0))))),
(!cast<Instruction>(NAME # SDr) (EXTRACT_SUBREG $Rn, dsub))>;
}

//---
Expand Down Expand Up @@ -13270,4 +13279,3 @@ multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{
let Predicates = [HasNEON, HasF8F32MM];
}
}

4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -5068,8 +5068,8 @@ defm SCVTF : IntegerToFP<0b00, 0b010, "scvtf", any_sint_to_fp>;
defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;

let Predicates = [HasNEON, HasFPRCVT] in {
defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf">;
defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf">;
defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf", any_sint_to_fp>;
defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf", any_uint_to_fp>;
}

def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
Expand Down
283 changes: 283 additions & 0 deletions llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mattr=+neon,+fullfp16,+fprcvt -verify-machineinstrs %s -o - | FileCheck %s
; RUN: llc -mattr=+neon -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FPRCVT

target triple = "aarch64-unknown-linux-gnu"


; To demonstrate what we have implemented, we'll want a scalar integer value in a SIMD/FP register.
; A common case for this setup is when using the result of an integer reduction intrinsic.

; SCVTF

define half @scvtf_f16i32(<4 x i32> %x) {
; CHECK-LABEL: scvtf_f16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: scvtf h0, s0
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: scvtf_f16i32:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: scvtf s0, s0
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to have tests using the extract vector without the .neon.saddv.
Something like:
define half @scvtf_f16i32_v(<4 x i32> %x) {
%extract = extractelement <4 x i32> %x, i64 0
%conv = sitofp i32 % extract to half
ret half %conv
}

I believe atm we cannot do anything for this:
define half @scvtf_f16i32_s(i32 %x) {
%conv = sitofp i32 %x to half
ret half %conv
}
Is it possible to add a patterns for that too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I will replace the .neon.saddv lines in the tests, I agree that it will make them clearer.

When I add these patterns it still uses the old versions of scvtf as it prioritises them (i think the priority is just the order they appear in the file for now)
def : Pat<(f16 (op (i32 FPR32:$Rn))),
(!cast(NAME # HSr) $Rn)>;
note: op resolves to any_{u,s}int_to_fp

; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <4 x i32> %x, i64 0
%conv = sitofp i32 %extract to half
ret half %conv
}

define half @scvtf_f16i32_neg(<4 x i32> %x) {
; CHECK-LABEL: scvtf_f16i32_neg:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: scvtf h0, w8
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: scvtf_f16i32_neg:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: mov w8, v0.s[1]
; CHECK-NO-FPRCVT-NEXT: scvtf s0, w8
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <4 x i32> %x, i64 1
%conv = sitofp i32 %extract to half
ret half %conv
}

define double @scvtf_f64i32(<4 x i32> %x) {
; CHECK-LABEL: scvtf_f64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: scvtf d0, s0
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: fmov w8, s0
; CHECK-NO-FPRCVT-NEXT: scvtf d0, w8
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <4 x i32> %x, i64 0
%conv = sitofp i32 %extract to double
ret double %conv
}

define double @scvtf_f64i32_neg(<4 x i32> %x) {
; CHECK-LABEL: scvtf_f64i32_neg:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32_neg:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: mov w8, v0.s[1]
; CHECK-NO-FPRCVT-NEXT: scvtf d0, w8
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <4 x i32> %x, i64 1
%conv = sitofp i32 %extract to double
ret double %conv
}

define half @scvtf_f16i64(<2 x i64> %x) {
; CHECK-LABEL: scvtf_f16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: scvtf h0, d0
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: scvtf_f16i64:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: fmov x8, d0
; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <2 x i64> %x, i64 0
%conv = sitofp i64 %extract to half
ret half %conv
}

define half @scvtf_f16i64_neg(<2 x i64> %x) {
; CHECK-LABEL: scvtf_f16i64_neg:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: scvtf h0, x8
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: scvtf_f16i64_neg:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: mov x8, v0.d[1]
; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <2 x i64> %x, i64 1
%conv = sitofp i64 %extract to half
ret half %conv
}

define float @scvtf_f32i64(<2 x i64> %x) {
; CHECK-LABEL: scvtf_f32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: scvtf s0, d0
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: scvtf_f32i64:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: fmov x8, d0
; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <2 x i64> %x, i64 0
%conv = sitofp i64 %extract to float
ret float %conv
}

define float @scvtf_f32i64_neg(<2 x i64> %x) {
; CHECK-LABEL: scvtf_f32i64_neg:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: scvtf s0, x8
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: scvtf_f32i64_neg:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: mov x8, v0.d[1]
; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <2 x i64> %x, i64 1
%conv = sitofp i64 %extract to float
ret float %conv
}

; UCVTF

define half @ucvtf_f16i32(<4 x i32> %x) {
; CHECK-LABEL: ucvtf_f16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ucvtf h0, s0
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i32:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: ucvtf s0, s0
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <4 x i32> %x, i64 0
%conv = uitofp i32 %extract to half
ret half %conv
}

define half @ucvtf_f16i32_neg(<4 x i32> %x) {
; CHECK-LABEL: ucvtf_f16i32_neg:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: ucvtf h0, w8
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i32_neg:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: mov w8, v0.s[1]
; CHECK-NO-FPRCVT-NEXT: ucvtf s0, w8
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <4 x i32> %x, i64 1
%conv = uitofp i32 %extract to half
ret half %conv
}

define double @ucvtf_f64i32(<4 x i32> %x) {
; CHECK-LABEL: ucvtf_f64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ucvtf d0, s0
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: fmov w8, s0
; CHECK-NO-FPRCVT-NEXT: ucvtf d0, w8
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <4 x i32> %x, i64 0
%conv = uitofp i32 %extract to double
ret double %conv
}

define double @ucvtf_f64i32_neg(<4 x i32> %x) {
; CHECK-LABEL: ucvtf_f64i32_neg:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: ucvtf d0, w8
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32_neg:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: mov w8, v0.s[1]
; CHECK-NO-FPRCVT-NEXT: ucvtf d0, w8
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <4 x i32> %x, i64 1
%conv = uitofp i32 %extract to double
ret double %conv
}

define half @ucvtf_f16i64(<2 x i64> %x) {
; CHECK-LABEL: ucvtf_f16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ucvtf h0, d0
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i64:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: fmov x8, d0
; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <2 x i64> %x, i64 0
%conv = uitofp i64 %extract to half
ret half %conv
}

define half @ucvtf_f16i64_neg(<2 x i64> %x) {
; CHECK-LABEL: ucvtf_f16i64_neg:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: ucvtf h0, x8
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i64_neg:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: mov x8, v0.d[1]
; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <2 x i64> %x, i64 1
%conv = uitofp i64 %extract to half
ret half %conv
}

define float @ucvtf_f32i64(<2 x i64> %x) {
; CHECK-LABEL: ucvtf_f32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ucvtf s0, d0
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: ucvtf_f32i64:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: fmov x8, d0
; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <2 x i64> %x, i64 0
%conv = uitofp i64 %extract to float
ret float %conv
}

define float @ucvtf_f32i64_neg(<2 x i64> %x) {
; CHECK-LABEL: ucvtf_f32i64_neg:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: ucvtf s0, x8
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: ucvtf_f32i64_neg:
; CHECK-NO-FPRCVT: // %bb.0:
; CHECK-NO-FPRCVT-NEXT: mov x8, v0.d[1]
; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8
; CHECK-NO-FPRCVT-NEXT: ret
%extract = extractelement <2 x i64> %x, i64 1
%conv = uitofp i64 %extract to float
ret float %conv
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we also add tests without the extract. Like this one:

define <1 x half> @scvtf_f16i32_1(<1 x i32> %x) {
 %conv = sitofp <1 x i32> %x to <1 x half>
 ret <1 x half> %conv
}

I think this will force the compiler to use the FPR register and then it can test
[(set (dvt dstType:$Rd), (node srcType:$Rn))]>, from line 5387 that now could also triggered when using any_sint_to_fp.

Just in case:
I did create these tests locally and noticed that convert between 32bits and 64bits does not used the new SCVTF as expect. But I don't think you need to fix that in this patch.
Like for instance:

define <1 x float> @scvtf_f32i64(<1 x i64> %x) {
 %conv = sitofp <1 x i64> %x to <1 x float>
 ret <1 x float> %conv
}

Is using:

 scvtf v0.2d, v0.2d
fcvtn v0.2s, v0.2d

but it should be :
scvtf s0, d0.