Skip to content

Commit d371cba

Browse files
paulwalker-armaokblast
authored andcommitted
[LLVM][CodeGen][AArch64] Fix global-isel for LD1R. (llvm#164418)
LD1Rv8b only supports a base register but the DAG is matched using am_indexed8 with the offset it finds silently dropped. I've also fixed a couple of immediate operands types inconsistencies that don't manifest as bugs because their incorrect scaling is overriden by the complex pattern and MachineInstr that are correct and thus there's nothing to test.
1 parent 35bf9c0 commit d371cba

File tree

3 files changed

+15
-25
lines changed

3 files changed

+15
-25
lines changed

llvm/lib/Target/AArch64/AArch64InstrAtomics.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,8 @@ def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_nonext_32>
136136
(ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend))))),
137137
(LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
138138
def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_nonext_32>
139-
(am_indexed32 GPR64sp:$Rn, uimm12s8:$offset))))),
140-
(LDRSui GPR64sp:$Rn, uimm12s8:$offset)>;
139+
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
140+
(LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
141141
def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_nonext_32>
142142
(am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
143143
(LDURSi GPR64sp:$Rn, simm9:$offset)>;
@@ -236,11 +236,11 @@ def : Pat<(relaxed_store<atomic_store_32>
236236
def : Pat<(releasing_store<atomic_store_64> GPR64sp:$ptr, GPR64:$val),
237237
(STLRX GPR64:$val, GPR64sp:$ptr)>;
238238
def : Pat<(relaxed_store<atomic_store_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
239-
ro_Wextend16:$extend),
239+
ro_Wextend64:$extend),
240240
GPR64:$val),
241241
(STRXroW GPR64:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
242242
def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
243-
ro_Xextend16:$extend),
243+
ro_Xextend64:$extend),
244244
GPR64:$val),
245245
(STRXroX GPR64:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
246246
def : Pat<(relaxed_store<atomic_store_64>
@@ -276,8 +276,8 @@ def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
276276
(i64 (bitconvert (f64 FPR64Op:$val)))),
277277
(STRDroX FPR64Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
278278
def : Pat<(relaxed_store<atomic_store_64>
279-
(am_indexed64 GPR64sp:$Rn, uimm12s4:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
280-
(STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
279+
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
280+
(STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s8:$offset)>;
281281
def : Pat<(relaxed_store<atomic_store_64>
282282
(am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
283283
(STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>;

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ let AddedComplexity = 19 in {
507507
defm : VecROStoreLane64_0Pat<ro32, store, v2i32, i32, ssub, STRSroW, STRSroX>;
508508
}
509509

510-
def : Pat<(v8i8 (AArch64dup (i8 (load (am_indexed8 GPR64sp:$Rn))))),
510+
def : Pat<(v8i8 (AArch64dup (i8 (load GPR64sp:$Rn)))),
511511
(LD1Rv8b GPR64sp:$Rn)>;
512512
def : Pat<(v16i8 (AArch64dup (i8 (load GPR64sp:$Rn)))),
513513
(LD1Rv16b GPR64sp:$Rn)>;

llvm/test/CodeGen/AArch64/arm64-ld1.ll

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1654,24 +1654,14 @@ define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(ptr %addr) {
16541654
}
16551655

16561656
define <8 x i8> @dup_ld1_from_stack(ptr %__ret) {
1657-
; CHECK-SD-LABEL: dup_ld1_from_stack:
1658-
; CHECK-SD: // %bb.0: // %entry
1659-
; CHECK-SD-NEXT: sub sp, sp, #16
1660-
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
1661-
; CHECK-SD-NEXT: add x8, sp, #15
1662-
; CHECK-SD-NEXT: ld1r.8b { v0 }, [x8]
1663-
; CHECK-SD-NEXT: add sp, sp, #16
1664-
; CHECK-SD-NEXT: ret
1665-
;
1666-
; CHECK-GI-LABEL: dup_ld1_from_stack:
1667-
; CHECK-GI: // %bb.0: // %entry
1668-
; CHECK-GI-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
1669-
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
1670-
; CHECK-GI-NEXT: .cfi_offset w29, -16
1671-
; CHECK-GI-NEXT: add x8, sp, #15
1672-
; CHECK-GI-NEXT: ld1r.8b { v0 }, [x8]
1673-
; CHECK-GI-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
1674-
; CHECK-GI-NEXT: ret
1657+
; CHECK-LABEL: dup_ld1_from_stack:
1658+
; CHECK: // %bb.0: // %entry
1659+
; CHECK-NEXT: sub sp, sp, #16
1660+
; CHECK-NEXT: .cfi_def_cfa_offset 16
1661+
; CHECK-NEXT: add x8, sp, #15
1662+
; CHECK-NEXT: ld1r.8b { v0 }, [x8]
1663+
; CHECK-NEXT: add sp, sp, #16
1664+
; CHECK-NEXT: ret
16751665
entry:
16761666
%item = alloca i8, align 1
16771667
%0 = load i8, ptr %item, align 1

0 commit comments

Comments
 (0)