Skip to content

Commit b9107bf

Browse files
authored
[RISCV] Support zilsd-4byte-align for i64 load/store in SelectionDAG. (#169182)
I think we need to keep the SelectionDAG code for volatile load/store so we should support 4 byte alignment when possible.
1 parent d5f3ab8 commit b9107bf

File tree

4 files changed

+101
-5
lines changed

4 files changed

+101
-5
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8404,7 +8404,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
84048404
if (Store->isTruncatingStore())
84058405
return SDValue();
84068406

8407-
if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
8407+
if (Store->getAlign() < Subtarget.getZilsdAlign())
84088408
return SDValue();
84098409

84108410
SDLoc DL(Op);
@@ -14803,7 +14803,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
1480314803
assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
1480414804
"Unexpected custom legalisation");
1480514805

14806-
if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
14806+
if (Ld->getAlign() < Subtarget.getZilsdAlign())
1480714807
return;
1480814808

1480914809
SDLoc DL(N);

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,13 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
237237

238238
return 0;
239239
}
240+
241+
Align getZilsdAlign() const {
242+
return Align(enableUnalignedScalarMem() ? 1
243+
: allowZilsd4ByteAlign() ? 4
244+
: 8);
245+
}
246+
240247
unsigned getELen() const {
241248
assert(hasVInstructions() && "Expected V extension");
242249
return hasVInstructionsI64() ? 64 : 32;

llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,7 @@ bool RISCVPreAllocZilsdOpt::runOnMachineFunction(MachineFunction &MF) {
146146

147147
// Check alignment: default is 8-byte, but allow 4-byte with tune feature
148148
// If unaligned scalar memory is enabled, allow any alignment
149-
RequiredAlign = STI->enableUnalignedScalarMem() ? Align(1)
150-
: STI->allowZilsd4ByteAlign() ? Align(4)
151-
: Align(8);
149+
RequiredAlign = STI->getZilsdAlign();
152150
bool Modified = false;
153151
for (auto &MBB : MF) {
154152
Modified |= rescheduleLoadStoreInstrs(&MBB);

llvm/test/CodeGen/RISCV/zilsd.ll

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
; RUN: | FileCheck -check-prefixes=CHECK,SLOW %s
44
; RUN: llc -mtriple=riscv32 -mattr=+zilsd,+unaligned-scalar-mem -verify-machineinstrs < %s \
55
; RUN: | FileCheck -check-prefixes=CHECK,FAST %s
6+
; RUN: llc -mtriple=riscv32 -mattr=+zilsd,+zilsd-4byte-align -verify-machineinstrs < %s \
7+
; RUN: | FileCheck -check-prefixes=CHECK,4BYTEALIGN %s
68

79
define i64 @load(ptr %a) nounwind {
810
; CHECK-LABEL: load:
@@ -17,6 +19,28 @@ define i64 @load(ptr %a) nounwind {
1719
ret i64 %2
1820
}
1921

22+
define i64 @load_align4(ptr %a) nounwind {
23+
; SLOW-LABEL: load_align4:
24+
; SLOW: # %bb.0:
25+
; SLOW-NEXT: lw a2, 80(a0)
26+
; SLOW-NEXT: lw a1, 84(a0)
27+
; SLOW-NEXT: mv a0, a2
28+
; SLOW-NEXT: ret
29+
;
30+
; FAST-LABEL: load_align4:
31+
; FAST: # %bb.0:
32+
; FAST-NEXT: ld a0, 80(a0)
33+
; FAST-NEXT: ret
34+
;
35+
; 4BYTEALIGN-LABEL: load_align4:
36+
; 4BYTEALIGN: # %bb.0:
37+
; 4BYTEALIGN-NEXT: ld a0, 80(a0)
38+
; 4BYTEALIGN-NEXT: ret
39+
%1 = getelementptr i64, ptr %a, i32 10
40+
%2 = load i64, ptr %1, align 4
41+
ret i64 %2
42+
}
43+
2044
define void @store(ptr %a, i64 %b) nounwind {
2145
; CHECK-LABEL: store:
2246
; CHECK: # %bb.0:
@@ -31,6 +55,31 @@ define void @store(ptr %a, i64 %b) nounwind {
3155
ret void
3256
}
3357

58+
define void @store_align4(ptr %a, i64 %b) nounwind {
59+
; SLOW-LABEL: store_align4:
60+
; SLOW: # %bb.0:
61+
; SLOW-NEXT: sw a1, 88(a0)
62+
; SLOW-NEXT: sw a2, 92(a0)
63+
; SLOW-NEXT: ret
64+
;
65+
; FAST-LABEL: store_align4:
66+
; FAST: # %bb.0:
67+
; FAST-NEXT: mv a3, a2
68+
; FAST-NEXT: mv a2, a1
69+
; FAST-NEXT: sd a2, 88(a0)
70+
; FAST-NEXT: ret
71+
;
72+
; 4BYTEALIGN-LABEL: store_align4:
73+
; 4BYTEALIGN: # %bb.0:
74+
; 4BYTEALIGN-NEXT: mv a3, a2
75+
; 4BYTEALIGN-NEXT: mv a2, a1
76+
; 4BYTEALIGN-NEXT: sd a2, 88(a0)
77+
; 4BYTEALIGN-NEXT: ret
78+
%1 = getelementptr i64, ptr %a, i32 11
79+
store i64 %b, ptr %1, align 4
80+
ret void
81+
}
82+
3483
define i64 @load_unaligned(ptr %p) {
3584
; SLOW-LABEL: load_unaligned:
3685
; SLOW: # %bb.0:
@@ -60,6 +109,30 @@ define i64 @load_unaligned(ptr %p) {
60109
; FAST: # %bb.0:
61110
; FAST-NEXT: ld a0, 0(a0)
62111
; FAST-NEXT: ret
112+
;
113+
; 4BYTEALIGN-LABEL: load_unaligned:
114+
; 4BYTEALIGN: # %bb.0:
115+
; 4BYTEALIGN-NEXT: lbu a1, 1(a0)
116+
; 4BYTEALIGN-NEXT: lbu a2, 2(a0)
117+
; 4BYTEALIGN-NEXT: lbu a3, 3(a0)
118+
; 4BYTEALIGN-NEXT: lbu a4, 0(a0)
119+
; 4BYTEALIGN-NEXT: slli a1, a1, 8
120+
; 4BYTEALIGN-NEXT: slli a2, a2, 16
121+
; 4BYTEALIGN-NEXT: slli a3, a3, 24
122+
; 4BYTEALIGN-NEXT: or a1, a1, a4
123+
; 4BYTEALIGN-NEXT: or a2, a3, a2
124+
; 4BYTEALIGN-NEXT: lbu a3, 5(a0)
125+
; 4BYTEALIGN-NEXT: lbu a4, 4(a0)
126+
; 4BYTEALIGN-NEXT: lbu a5, 6(a0)
127+
; 4BYTEALIGN-NEXT: lbu a0, 7(a0)
128+
; 4BYTEALIGN-NEXT: slli a3, a3, 8
129+
; 4BYTEALIGN-NEXT: or a3, a3, a4
130+
; 4BYTEALIGN-NEXT: slli a5, a5, 16
131+
; 4BYTEALIGN-NEXT: slli a0, a0, 24
132+
; 4BYTEALIGN-NEXT: or a5, a0, a5
133+
; 4BYTEALIGN-NEXT: or a0, a2, a1
134+
; 4BYTEALIGN-NEXT: or a1, a5, a3
135+
; 4BYTEALIGN-NEXT: ret
63136
%res = load i64, ptr %p, align 1
64137
ret i64 %res
65138
}
@@ -89,6 +162,24 @@ define void @store_unaligned(ptr %p, i64 %v) {
89162
; FAST-NEXT: mv a2, a1
90163
; FAST-NEXT: sd a2, 0(a0)
91164
; FAST-NEXT: ret
165+
;
166+
; 4BYTEALIGN-LABEL: store_unaligned:
167+
; 4BYTEALIGN: # %bb.0:
168+
; 4BYTEALIGN-NEXT: srli a3, a2, 24
169+
; 4BYTEALIGN-NEXT: srli a4, a2, 16
170+
; 4BYTEALIGN-NEXT: srli a5, a2, 8
171+
; 4BYTEALIGN-NEXT: srli a6, a1, 24
172+
; 4BYTEALIGN-NEXT: srli a7, a1, 16
173+
; 4BYTEALIGN-NEXT: sb a2, 4(a0)
174+
; 4BYTEALIGN-NEXT: sb a5, 5(a0)
175+
; 4BYTEALIGN-NEXT: sb a4, 6(a0)
176+
; 4BYTEALIGN-NEXT: sb a3, 7(a0)
177+
; 4BYTEALIGN-NEXT: srli a2, a1, 8
178+
; 4BYTEALIGN-NEXT: sb a1, 0(a0)
179+
; 4BYTEALIGN-NEXT: sb a2, 1(a0)
180+
; 4BYTEALIGN-NEXT: sb a7, 2(a0)
181+
; 4BYTEALIGN-NEXT: sb a6, 3(a0)
182+
; 4BYTEALIGN-NEXT: ret
92183
store i64 %v, ptr %p, align 1
93184
ret void
94185
}

0 commit comments

Comments
 (0)