-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AArch64] Codegen for new SCVTF/UCVTF variants (FEAT_FPRCVT) #123767
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,283 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc -mattr=+neon,+fullfp16,+fprcvt -verify-machineinstrs %s -o - | FileCheck %s | ||
| ; RUN: llc -mattr=+neon -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FPRCVT | ||
|
|
||
| target triple = "aarch64-unknown-linux-gnu" | ||
|
|
||
|
|
||
| ; To demonstrate what we have implemented, we'll want a scalar integer value in a SIMD/FP register. | ||
| ; A common case for this setup is when using the result of an integer reduction intrinsic. | ||
|
|
||
| ; SCVTF | ||
|
|
||
| define half @scvtf_f16i32(<4 x i32> %x) { | ||
| ; CHECK-LABEL: scvtf_f16i32: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: scvtf h0, s0 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: scvtf_f16i32: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: scvtf s0, s0 | ||
| ; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <4 x i32> %x, i64 0 | ||
| %conv = sitofp i32 %extract to half | ||
| ret half %conv | ||
| } | ||
|
|
||
| define half @scvtf_f16i32_neg(<4 x i32> %x) { | ||
| ; CHECK-LABEL: scvtf_f16i32_neg: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: mov w8, v0.s[1] | ||
| ; CHECK-NEXT: scvtf h0, w8 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: scvtf_f16i32_neg: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: mov w8, v0.s[1] | ||
| ; CHECK-NO-FPRCVT-NEXT: scvtf s0, w8 | ||
| ; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <4 x i32> %x, i64 1 | ||
| %conv = sitofp i32 %extract to half | ||
| ret half %conv | ||
| } | ||
|
|
||
| define double @scvtf_f64i32(<4 x i32> %x) { | ||
| ; CHECK-LABEL: scvtf_f64i32: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: scvtf d0, s0 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: fmov w8, s0 | ||
| ; CHECK-NO-FPRCVT-NEXT: scvtf d0, w8 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <4 x i32> %x, i64 0 | ||
| %conv = sitofp i32 %extract to double | ||
| ret double %conv | ||
| } | ||
|
|
||
| define double @scvtf_f64i32_neg(<4 x i32> %x) { | ||
| ; CHECK-LABEL: scvtf_f64i32_neg: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: mov w8, v0.s[1] | ||
| ; CHECK-NEXT: scvtf d0, w8 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32_neg: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: mov w8, v0.s[1] | ||
| ; CHECK-NO-FPRCVT-NEXT: scvtf d0, w8 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <4 x i32> %x, i64 1 | ||
| %conv = sitofp i32 %extract to double | ||
| ret double %conv | ||
| } | ||
|
|
||
| define half @scvtf_f16i64(<2 x i64> %x) { | ||
| ; CHECK-LABEL: scvtf_f16i64: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: scvtf h0, d0 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: scvtf_f16i64: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: fmov x8, d0 | ||
| ; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8 | ||
| ; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <2 x i64> %x, i64 0 | ||
| %conv = sitofp i64 %extract to half | ||
| ret half %conv | ||
| } | ||
|
|
||
| define half @scvtf_f16i64_neg(<2 x i64> %x) { | ||
| ; CHECK-LABEL: scvtf_f16i64_neg: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: mov x8, v0.d[1] | ||
| ; CHECK-NEXT: scvtf h0, x8 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: scvtf_f16i64_neg: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: mov x8, v0.d[1] | ||
| ; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8 | ||
| ; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <2 x i64> %x, i64 1 | ||
| %conv = sitofp i64 %extract to half | ||
| ret half %conv | ||
| } | ||
|
|
||
| define float @scvtf_f32i64(<2 x i64> %x) { | ||
| ; CHECK-LABEL: scvtf_f32i64: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: scvtf s0, d0 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: scvtf_f32i64: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: fmov x8, d0 | ||
| ; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <2 x i64> %x, i64 0 | ||
| %conv = sitofp i64 %extract to float | ||
| ret float %conv | ||
| } | ||
|
|
||
| define float @scvtf_f32i64_neg(<2 x i64> %x) { | ||
| ; CHECK-LABEL: scvtf_f32i64_neg: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: mov x8, v0.d[1] | ||
| ; CHECK-NEXT: scvtf s0, x8 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: scvtf_f32i64_neg: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: mov x8, v0.d[1] | ||
| ; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <2 x i64> %x, i64 1 | ||
| %conv = sitofp i64 %extract to float | ||
| ret float %conv | ||
| } | ||
|
|
||
| ; UCVTF | ||
|
|
||
| define half @ucvtf_f16i32(<4 x i32> %x) { | ||
| ; CHECK-LABEL: ucvtf_f16i32: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: ucvtf h0, s0 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i32: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: ucvtf s0, s0 | ||
| ; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <4 x i32> %x, i64 0 | ||
| %conv = uitofp i32 %extract to half | ||
| ret half %conv | ||
| } | ||
|
|
||
| define half @ucvtf_f16i32_neg(<4 x i32> %x) { | ||
| ; CHECK-LABEL: ucvtf_f16i32_neg: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: mov w8, v0.s[1] | ||
| ; CHECK-NEXT: ucvtf h0, w8 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i32_neg: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: mov w8, v0.s[1] | ||
| ; CHECK-NO-FPRCVT-NEXT: ucvtf s0, w8 | ||
| ; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <4 x i32> %x, i64 1 | ||
| %conv = uitofp i32 %extract to half | ||
| ret half %conv | ||
| } | ||
|
|
||
| define double @ucvtf_f64i32(<4 x i32> %x) { | ||
| ; CHECK-LABEL: ucvtf_f64i32: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: ucvtf d0, s0 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: fmov w8, s0 | ||
| ; CHECK-NO-FPRCVT-NEXT: ucvtf d0, w8 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <4 x i32> %x, i64 0 | ||
| %conv = uitofp i32 %extract to double | ||
| ret double %conv | ||
| } | ||
|
|
||
| define double @ucvtf_f64i32_neg(<4 x i32> %x) { | ||
| ; CHECK-LABEL: ucvtf_f64i32_neg: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: mov w8, v0.s[1] | ||
| ; CHECK-NEXT: ucvtf d0, w8 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32_neg: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: mov w8, v0.s[1] | ||
| ; CHECK-NO-FPRCVT-NEXT: ucvtf d0, w8 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <4 x i32> %x, i64 1 | ||
| %conv = uitofp i32 %extract to double | ||
| ret double %conv | ||
| } | ||
|
|
||
| define half @ucvtf_f16i64(<2 x i64> %x) { | ||
| ; CHECK-LABEL: ucvtf_f16i64: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: ucvtf h0, d0 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i64: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: fmov x8, d0 | ||
| ; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8 | ||
| ; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <2 x i64> %x, i64 0 | ||
| %conv = uitofp i64 %extract to half | ||
| ret half %conv | ||
| } | ||
|
|
||
| define half @ucvtf_f16i64_neg(<2 x i64> %x) { | ||
| ; CHECK-LABEL: ucvtf_f16i64_neg: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: mov x8, v0.d[1] | ||
| ; CHECK-NEXT: ucvtf h0, x8 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i64_neg: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: mov x8, v0.d[1] | ||
| ; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8 | ||
| ; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <2 x i64> %x, i64 1 | ||
| %conv = uitofp i64 %extract to half | ||
| ret half %conv | ||
| } | ||
|
|
||
| define float @ucvtf_f32i64(<2 x i64> %x) { | ||
| ; CHECK-LABEL: ucvtf_f32i64: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: ucvtf s0, d0 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: ucvtf_f32i64: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: fmov x8, d0 | ||
| ; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <2 x i64> %x, i64 0 | ||
| %conv = uitofp i64 %extract to float | ||
| ret float %conv | ||
| } | ||
|
|
||
| define float @ucvtf_f32i64_neg(<2 x i64> %x) { | ||
| ; CHECK-LABEL: ucvtf_f32i64_neg: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: mov x8, v0.d[1] | ||
| ; CHECK-NEXT: ucvtf s0, x8 | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; CHECK-NO-FPRCVT-LABEL: ucvtf_f32i64_neg: | ||
| ; CHECK-NO-FPRCVT: // %bb.0: | ||
| ; CHECK-NO-FPRCVT-NEXT: mov x8, v0.d[1] | ||
| ; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8 | ||
| ; CHECK-NO-FPRCVT-NEXT: ret | ||
| %extract = extractelement <2 x i64> %x, i64 1 | ||
| %conv = uitofp i64 %extract to float | ||
| ret float %conv | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we also add tests without the extract. Like this one: I think this will force the compiler to use the FPR register and then it can test Just in case: Is using: but it should be : |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be nice to have tests using the extract vector without the .neon.saddv.
Something like:
define half @scvtf_f16i32_v(<4 x i32> %x) {
%extract = extractelement <4 x i32> %x, i64 0
%conv = sitofp i32 % extract to half
ret half %conv
}
I believe atm we cannot do anything for this:
define half @scvtf_f16i32_s(i32 %x) {
%conv = sitofp i32 %x to half
ret half %conv
}
Is it possible to add a patterns for that too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok I will replace the .neon.saddv lines in the tests, I agree that it will make them clearer.
When I add these patterns it still uses the old versions of scvtf as it prioritises them (i think the priority is just the order they appear in the file for now)
def : Pat<(f16 (op (i32 FPR32:$Rn))),
(!cast(NAME # HSr) $Rn)>;
note: op resolves to any_{u,s}int_to_fp