Skip to content

Commit b4edd82

Browse files
authored
[AArch64] Remove redundant FMOV for zero-extended i32/i16 loads to f64 (llvm#146920)
Previously, a separate load, zext and FMOV instruction was emitted. This patch adds a new TableGen pattern to avoid the unnecessary FMOV. A test is included in test/CodeGen/AArch64/load_u64_from_u32.ll
1 parent 75e60e7 commit b4edd82

File tree

2 files changed

+102
-0
lines changed

2 files changed

+102
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3934,6 +3934,26 @@ defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
39343934
def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
39353935
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
39363936

3937+
// load zero-extended i32, bitcast to f64
3938+
def : Pat <(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3939+
(SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3940+
3941+
// load zero-extended i16, bitcast to f64
3942+
def : Pat <(f64 (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))),
3943+
(SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3944+
3945+
// load zero-extended i8, bitcast to f64
3946+
def : Pat <(f64 (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))),
3947+
(SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3948+
3949+
// load zero-extended i16, bitcast to f32
3950+
def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3951+
(SUBREG_TO_REG (i32 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3952+
3953+
// load zero-extended i8, bitcast to f32
3954+
def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))),
3955+
(SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3956+
39373957
// Pre-fetch.
39383958
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
39393959
[(AArch64Prefetch timm:$Rt,
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
3+
4+
; load zero-extended i32, bitcast to f64
5+
define double @_Z9load_u64_from_u32_testPj(ptr %n){
6+
; CHECK-LABEL: _Z9load_u64_from_u32_testPj:
7+
; CHECK: // %bb.0: // %entry
8+
; CHECK-NEXT: ldr s0, [x0]
9+
; CHECK-NEXT: ret
10+
entry:
11+
%0 = load i32, ptr %n, align 4
12+
%conv = zext i32 %0 to i64
13+
%1 = bitcast i64 %conv to double
14+
ret double %1
15+
}
16+
17+
; load zero-extended i16, bitcast to f64
18+
define double @_Z9load_u64_from_u16_testPj(ptr %n){
19+
; CHECK-LABEL: _Z9load_u64_from_u16_testPj:
20+
; CHECK: // %bb.0: // %entry
21+
; CHECK-NEXT: ldr h0, [x0]
22+
; CHECK-NEXT: ret
23+
entry:
24+
%0 = load i16, ptr %n, align 2
25+
%conv = zext i16 %0 to i64
26+
%1 = bitcast i64 %conv to double
27+
ret double %1
28+
}
29+
30+
; load zero-extended i8, bitcast to f64
31+
define double @_Z16load_u64_from_u8Ph(ptr %n){
32+
; CHECK-LABEL: _Z16load_u64_from_u8Ph:
33+
; CHECK: // %bb.0: // %entry
34+
; CHECK-NEXT: ldr b0, [x0]
35+
; CHECK-NEXT: ret
36+
entry:
37+
%0 = load i8, ptr %n, align 1
38+
%conv = zext i8 %0 to i64
39+
%1 = bitcast i64 %conv to double
40+
ret double %1
41+
}
42+
43+
; load zero-extended i16, bitcast to f32
44+
define float @_Z17load_u32_from_u16Pt(ptr %n){
45+
; CHECK-LABEL: _Z17load_u32_from_u16Pt:
46+
; CHECK: // %bb.0: // %entry
47+
; CHECK-NEXT: ldr h0, [x0]
48+
; CHECK-NEXT: ret
49+
entry:
50+
%0 = load i16, ptr %n, align 2
51+
%conv = zext i16 %0 to i32
52+
%1 = bitcast i32 %conv to float
53+
ret float %1
54+
}
55+
56+
; load zero-extended i8, bitcast to f32
57+
define float @_Z16load_u32_from_u8Ph(ptr %n){
58+
; CHECK-LABEL: _Z16load_u32_from_u8Ph:
59+
; CHECK: // %bb.0: // %entry
60+
; CHECK-NEXT: ldr b0, [x0]
61+
; CHECK-NEXT: ret
62+
entry:
63+
%0 = load i8, ptr %n, align 1
64+
%conv = zext i8 %0 to i32
65+
%1 = bitcast i32 %conv to float
66+
ret float %1
67+
}
68+
69+
; load zero-extended i8, bitcast to f16
70+
define half @_Z16load_u16_from_u8Ph(ptr %n){
71+
; CHECK-LABEL: _Z16load_u16_from_u8Ph:
72+
; CHECK: // %bb.0: // %entry
73+
; CHECK-NEXT: ldr b0, [x0]
74+
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
75+
; CHECK-NEXT: ret
76+
entry:
77+
%0 = load i8, ptr %n, align 1
78+
%conv = zext i8 %0 to i16
79+
%1 = bitcast i16 %conv to half
80+
ret half %1
81+
}
82+

0 commit comments

Comments
 (0)