Skip to content

Commit 1709d18

Browse files
committed
fixup: Improve the aligment check
1 parent 1371e44 commit 1709d18

File tree

2 files changed

+23
-18
lines changed

2 files changed

+23
-18
lines changed

llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
142142
return false;
143143
}
144144

145+
// Merge two adjacent load/store instructions into a paired instruction
146+
// (LDP/SDP/SWP/LWP) if the effective address is 16-byte aligned. This function
147+
// selects the appropriate paired opcode, verifies that the memory operand (or
148+
// fixed-stack slot) is 16-byte aligned, and checks that the offset is valid. If
149+
// all conditions are met, it builds and inserts the paired instruction.
145150
bool RISCVLoadStoreOpt::tryConvertToLdStPair(
146151
MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) {
147152
unsigned PairOpc;
@@ -166,16 +171,22 @@ bool RISCVLoadStoreOpt::tryConvertToLdStPair(
166171
MachineFunction *MF = First->getMF();
167172
const MachineMemOperand *MMO = *First->memoperands_begin();
168173
Align MMOAlign = MMO->getAlign();
169-
if (const PseudoSourceValue *Source = MMO->getPseudoValue())
174+
175+
// The stack pointer shall be aligned to a 128-bit per ABI.
176+
if (const PseudoSourceValue *Source = MMO->getPseudoValue()) {
170177
if (Source->kind() == PseudoSourceValue::FixedStack)
171178
MMOAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
179+
}
172180

173-
if (MMOAlign < Align(MMO->getSize().getValue() * 2))
181+
// Only pair if alignment is exactly 16 bytes.
182+
if (MMOAlign != 16)
174183
return false;
184+
175185
int64_t Offset = First->getOperand(2).getImm();
176186
if (!isUInt<7>(Offset) ||
177187
!isAligned(Align(MMO->getSize().getValue()), Offset))
178188
return false;
189+
179190
MachineInstrBuilder MIB = BuildMI(
180191
*MF,
181192
First->getDebugLoc().get() ? First->getDebugLoc() : Second->getDebugLoc(),

llvm/test/CodeGen/RISCV/load-store-pair.ll

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -199,10 +199,8 @@ define dso_local void @testi(i8** nocapture noundef readonly %a) local_unnamed_a
199199
; RV64I_PAIR-NEXT: .cfi_offset s3, -16
200200
; RV64I_PAIR-NEXT: .cfi_offset s4, -24
201201
; RV64I_PAIR-NEXT: .cfi_offset s5, -32
202-
; RV64I_PAIR-NEXT: ld s3, 0(a0)
203-
; RV64I_PAIR-NEXT: ld s2, 8(a0)
204-
; RV64I_PAIR-NEXT: ld s5, 16(a0)
205-
; RV64I_PAIR-NEXT: ld s4, 24(a0)
202+
; RV64I_PAIR-NEXT: mips.ldp s3, s2, 0(a0)
203+
; RV64I_PAIR-NEXT: mips.ldp s5, s4, 16(a0)
206204
; RV64I_PAIR-NEXT: #APP
207205
; RV64I_PAIR-NEXT: #NO_APP
208206
; RV64I_PAIR-NEXT: mips.ldp s3, s2, 16(sp) # 16-byte Folded Reload
@@ -225,10 +223,8 @@ define dso_local void @testi(i8** nocapture noundef readonly %a) local_unnamed_a
225223
; RV64P_8700-NEXT: .cfi_offset s3, -16
226224
; RV64P_8700-NEXT: .cfi_offset s4, -24
227225
; RV64P_8700-NEXT: .cfi_offset s5, -32
228-
; RV64P_8700-NEXT: ld s3, 0(a0)
229-
; RV64P_8700-NEXT: ld s2, 8(a0)
230-
; RV64P_8700-NEXT: ld s5, 16(a0)
231-
; RV64P_8700-NEXT: ld s4, 24(a0)
226+
; RV64P_8700-NEXT: mips.ldp s3, s2, 0(a0)
227+
; RV64P_8700-NEXT: mips.ldp s5, s4, 16(a0)
232228
; RV64P_8700-NEXT: #APP
233229
; RV64P_8700-NEXT: #NO_APP
234230
; RV64P_8700-NEXT: mips.ldp s3, s2, 16(sp) # 16-byte Folded Reload
@@ -251,10 +247,8 @@ define dso_local void @testi(i8** nocapture noundef readonly %a) local_unnamed_a
251247
; RV64D_PAIR-NEXT: .cfi_offset s3, -16
252248
; RV64D_PAIR-NEXT: .cfi_offset s4, -24
253249
; RV64D_PAIR-NEXT: .cfi_offset s5, -32
254-
; RV64D_PAIR-NEXT: ld s3, 0(a0)
255-
; RV64D_PAIR-NEXT: ld s2, 8(a0)
256-
; RV64D_PAIR-NEXT: ld s5, 16(a0)
257-
; RV64D_PAIR-NEXT: ld s4, 24(a0)
250+
; RV64D_PAIR-NEXT: mips.ldp s3, s2, 0(a0)
251+
; RV64D_PAIR-NEXT: mips.ldp s5, s4, 16(a0)
258252
; RV64D_PAIR-NEXT: #APP
259253
; RV64D_PAIR-NEXT: #NO_APP
260254
; RV64D_PAIR-NEXT: mips.ldp s3, s2, 16(sp) # 16-byte Folded Reload
@@ -298,12 +292,12 @@ define dso_local void @testi(i8** nocapture noundef readonly %a) local_unnamed_a
298292
; RV64D_NOPAIR-NEXT: ret
299293
entry:
300294
%arrayidx = getelementptr inbounds i8*, i8** %a, i64 1
301-
%0 = load i8*, i8** %arrayidx, align 8
302-
%1 = load i8*, i8** %a, align 8
295+
%0 = load i8*, i8** %arrayidx, align 16
296+
%1 = load i8*, i8** %a, align 16
303297
%arrayidx2 = getelementptr inbounds i8*, i8** %a, i64 3
304-
%2 = load i8*, i8** %arrayidx2, align 8
298+
%2 = load i8*, i8** %arrayidx2, align 16
305299
%arrayidx3 = getelementptr inbounds i8*, i8** %a, i64 2
306-
%3 = load i8*, i8** %arrayidx3, align 8
300+
%3 = load i8*, i8** %arrayidx3, align 16
307301
tail call void asm sideeffect "", "{x18},{x19},{x20},{x21}"(i8* %0, i8* %1, i8* %2, i8* %3)
308302
ret void
309303
}

0 commit comments

Comments
 (0)