Skip to content

Commit 248ad71

Browse files
authored
[AArch64] Correct SCVTF/UCVTF instructions for vector input (#152974)
This pull request improves support for scalar floating-point conversions from integer vectors on AArch64, specifically for the `scvtf` and `ucvtf` instructions. It fixes pattern matching so that single-element conversions from vectors now generate the expected scalar instructions and adds a new test to verify correct behavior for extracting a lane from a widened vector. **Pattern matching and code generation improvements:** * Added new patterns in `AArch64InstrInfo.td` to correctly match conversions from `v2i32` to `v1f64` using `scvtf` and `ucvtf`, ensuring the scalar instructions (`scvtf d0, s0` and `ucvtf d0, s0`) are generated when extracting a single lane. **Test updates and additions:** * Updated `scvtf_f64i32_simple` and `ucvtf_f64i32_simple` tests in `fprcvt-cvtf.ll` to reflect the correct generation of scalar instructions, removing previous comments about incorrect codegen and showing the expected output. * Added a new test `uitofp_sext_v2i32_extract_lane0` to verify correct code generation when extracting a lane from a widened vector and converting to double.
1 parent 1a746b6 commit 248ad71

File tree

2 files changed

+28
-14
lines changed

2 files changed

+28
-14
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5409,6 +5409,11 @@ defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;
54095409
let Predicates = [HasNEON, HasFPRCVT] in {
54105410
defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf", any_sint_to_fp>;
54115411
defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf", any_uint_to_fp>;
5412+
5413+
def : Pat<(v1f64 (extract_subvector (v2f64 (sint_to_fp (v2i64 (sext (v2i32 V64:$Rn))))), (i64 0))),
5414+
(SCVTFDSr (EXTRACT_SUBREG V64:$Rn, ssub))>;
5415+
def : Pat<(v1f64 (extract_subvector (v2f64 (uint_to_fp (v2i64 (zext (v2i32 V64:$Rn))))), (i64 0))),
5416+
(UCVTFDSr (EXTRACT_SUBREG V64:$Rn, ssub))>;
54125417
}
54135418

54145419
def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),

llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,10 @@ define double @scvtf_f64i32_neg(<4 x i32> %x) {
9494
ret double %conv
9595
}
9696

97-
; This test does not give the indended result of scvtf d0, s0
98-
; This is due to the input being loaded as a 2 item vector and
99-
; therefore using vector inputs that do not match the pattern
100-
; This test will be fixed in a future revision
10197
define <1 x double> @scvtf_f64i32_simple(<1 x i32> %x) {
10298
; CHECK-LABEL: scvtf_f64i32_simple:
10399
; CHECK: // %bb.0:
104-
; CHECK-NEXT: sshll v0.2d, v0.2s, #0
105-
; CHECK-NEXT: scvtf v0.2d, v0.2d
106-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
100+
; CHECK-NEXT: scvtf d0, s0
107101
; CHECK-NEXT: ret
108102
;
109103
; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32_simple:
@@ -315,16 +309,10 @@ define double @ucvtf_f64i32_neg(<4 x i32> %x) {
315309
ret double %conv
316310
}
317311

318-
; This test does not give the indended result of ucvtf d0, s0
319-
; This is due to the input being loaded as a 2 item vector and
320-
; therefore using vector inputs that do not match the pattern
321-
; This test will be fixed in a future revision
322312
define <1 x double> @ucvtf_f64i32_simple(<1 x i32> %x) {
323313
; CHECK-LABEL: ucvtf_f64i32_simple:
324314
; CHECK: // %bb.0:
325-
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
326-
; CHECK-NEXT: ucvtf v0.2d, v0.2d
327-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
315+
; CHECK-NEXT: ucvtf d0, s0
328316
; CHECK-NEXT: ret
329317
;
330318
; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32_simple:
@@ -449,3 +437,24 @@ define <1 x float> @ucvtf_f32i64_simple(<1 x i64> %x) {
449437
%conv = uitofp <1 x i64> %x to <1 x float>
450438
ret <1 x float> %conv
451439
}
440+
441+
define <1 x double> @uitofp_sext_v2i32_extract_lane0(<2 x i32> %x) {
442+
; CHECK-LABEL: uitofp_sext_v2i32_extract_lane0:
443+
; CHECK: // %bb.0:
444+
; CHECK-NEXT: sshll v0.2d, v0.2s, #0
445+
; CHECK-NEXT: ucvtf v0.2d, v0.2d
446+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
447+
; CHECK-NEXT: ret
448+
;
449+
; CHECK-NO-FPRCVT-LABEL: uitofp_sext_v2i32_extract_lane0:
450+
; CHECK-NO-FPRCVT: // %bb.0:
451+
; CHECK-NO-FPRCVT-NEXT: sshll v0.2d, v0.2s, #0
452+
; CHECK-NO-FPRCVT-NEXT: ucvtf v0.2d, v0.2d
453+
; CHECK-NO-FPRCVT-NEXT: // kill: def $d0 killed $d0 killed $q0
454+
; CHECK-NO-FPRCVT-NEXT: ret
455+
%wide = sext <2 x i32> %x to <2 x i64>
456+
%fpv2 = uitofp <2 x i64> %wide to <2 x double>
457+
%lane0 = shufflevector <2 x double> %fpv2, <2 x double> poison, <1 x i32> zeroinitializer
458+
ret <1 x double> %lane0
459+
}
460+

0 commit comments

Comments
 (0)