Skip to content

Commit b0a3668

Browse files
committed
[llvm] Optimize misaligned accesses with early profitable splitting
Misaligned load/store splitting remains in DAG legalization for correctness, but its naive half-based approach misses optimization opportunities when pointer alignment information exists. Additionally, MachineMemOperand alignment is weakened by commonAlignment(getBaseAlign(), getOffset()). This change introduces a new IR-stage transformation that conditionally splits misaligned accesses only when profitable. Key aspects: 1. Not a replacement for DAG legalization - DAG splitting is preserved 2. Uses precise pointer alignment knowledge to generate optimal splits 3. Strictly profit-driven: Applied only when splitting reduces costs 4. Preserves max alignment through pipeline via accurate MMOs Fixes #143215
1 parent 831fcb5 commit b0a3668

File tree

7 files changed

+487
-77
lines changed

7 files changed

+487
-77
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,8 @@ class CodeGenPrepare {
436436
bool optimizeExt(Instruction *&I);
437437
bool optimizeExtUses(Instruction *I);
438438
bool optimizeLoadExt(LoadInst *Load);
439+
bool optimizeStoreMisalign(StoreInst *ST);
440+
bool optimizeLoadMisalign(LoadInst *ST);
439441
bool optimizeShiftInst(BinaryOperator *BO);
440442
bool optimizeFunnelShift(IntrinsicInst *Fsh);
441443
bool optimizeSelectInst(SelectInst *SI);
@@ -7353,6 +7355,138 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
73537355
return true;
73547356
}
73557357

7358+
static bool isOptimizeMisalignCandidate(Instruction *I, const DataLayout *DL,
7359+
const TargetLowering *TLI,
7360+
const DominatorTree *DT) {
7361+
if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
7362+
return false;
7363+
7364+
Value *Ptr = I->getOperand(isa<StoreInst>(I) ? 1 : 0);
7365+
Align Alignment = isa<StoreInst>(I) ? cast<StoreInst>(I)->getAlign()
7366+
: cast<LoadInst>(I)->getAlign();
7367+
Type *ValTy = isa<StoreInst>(I) ? I->getOperand(0)->getType() : I->getType();
7368+
7369+
if (ValTy->isScalableTy() || !ValTy->isSized())
7370+
return false;
7371+
7372+
unsigned BitWidth = DL->getTypeSizeInBits(ValTy);
7373+
7374+
// DAG legalization can handle this situation well
7375+
if (Alignment.value() * 8 >= BitWidth / 2)
7376+
return false;
7377+
7378+
Type *PtrTy = Ptr->getType();
7379+
EVT ValVT = TLI->getValueType(*DL, ValTy, true);
7380+
if (!ValVT.isSimple() || ValVT == MVT::Other ||
7381+
TLI->allowsMisalignedMemoryAccesses(
7382+
ValVT, PtrTy->getPointerAddressSpace(), Alignment))
7383+
return false;
7384+
7385+
KnownBits Known = computeKnownBits(Ptr, *DL, nullptr, I, DT);
7386+
if (Known.isUnknown())
7387+
return false;
7388+
7389+
unsigned PtrWidth = DL->getPointerTypeSizeInBits(PtrTy);
7390+
KnownBits AlignKnown =
7391+
KnownBits::makeConstant(APInt(PtrWidth, Alignment.value()));
7392+
7393+
if (KnownBits::add(Known, AlignKnown).countMinTrailingZeros() <=
7394+
AlignKnown.countMinTrailingZeros())
7395+
return false;
7396+
return true;
7397+
}
7398+
7399+
bool CodeGenPrepare::optimizeStoreMisalign(StoreInst *SI) {
7400+
if (!isOptimizeMisalignCandidate(SI, DL, TLI, DT.get()))
7401+
return false;
7402+
7403+
IRBuilder<> Builder(SI);
7404+
Value *Val = SI->getValueOperand();
7405+
unsigned BitWidth = DL->getTypeSizeInBits(Val->getType());
7406+
if (!Val->getType()->isIntegerTy())
7407+
Val =
7408+
Builder.CreateBitCast(Val, Type::getIntNTy(SI->getContext(), BitWidth));
7409+
7410+
bool IsLE = DL->isLittleEndian();
7411+
bool IsVolatile = SI->isVolatile();
7412+
Align Alignment = SI->getAlign();
7413+
Value *Ptr = SI->getPointerOperand();
7414+
unsigned RemainingBits = BitWidth;
7415+
Type *Int8Ty = Type::getInt8Ty(SI->getContext());
7416+
Type *Int32Ty = Type::getInt32Ty(SI->getContext());
7417+
7418+
while (RemainingBits > 0) {
7419+
unsigned ChunkBits =
7420+
std::min((uint64_t)(RemainingBits), 8 * Alignment.value());
7421+
Type *ChunkTy = Type::getIntNTy(SI->getContext(), ChunkBits);
7422+
Value *ChunkVal;
7423+
if (IsLE) {
7424+
ChunkVal = Builder.CreateTrunc(Val, ChunkTy);
7425+
} else {
7426+
Value *ShiftR = Builder.CreateLShr(Val, BitWidth - ChunkBits);
7427+
ChunkVal = Builder.CreateTrunc(ShiftR, ChunkTy);
7428+
}
7429+
Builder.CreateAlignedStore(ChunkVal, Ptr, Alignment, IsVolatile);
7430+
RemainingBits -= ChunkBits;
7431+
if (RemainingBits == 0)
7432+
break;
7433+
7434+
Val = IsLE ? Builder.CreateLShr(Val, ChunkBits)
7435+
: Builder.CreateShl(Val, ChunkBits);
7436+
Ptr = Builder.CreateGEP(Int8Ty, Ptr,
7437+
ConstantInt::get(Int32Ty, ChunkBits / 8));
7438+
Alignment = getKnownAlignment(Ptr, *DL);
7439+
}
7440+
7441+
SI->eraseFromParent();
7442+
return true;
7443+
}
7444+
7445+
bool CodeGenPrepare::optimizeLoadMisalign(LoadInst *LI) {
7446+
if (!isOptimizeMisalignCandidate(LI, DL, TLI, DT.get()))
7447+
return false;
7448+
7449+
IRBuilder<> Builder(LI);
7450+
Type *ValTy = LI->getType();
7451+
7452+
unsigned BitWidth = DL->getTypeSizeInBits(LI->getType());
7453+
bool IsLE = DL->isLittleEndian();
7454+
bool IsVolatile = LI->isVolatile();
7455+
Align Alignment = LI->getAlign();
7456+
Value *Ptr = LI->getPointerOperand();
7457+
unsigned RemainingBits = BitWidth;
7458+
Type *IntTy = Type::getIntNTy(LI->getContext(), BitWidth);
7459+
Type *Int8Ty = Type::getInt8Ty(LI->getContext());
7460+
Type *Int32Ty = Type::getInt32Ty(LI->getContext());
7461+
Value *Val = ConstantInt::get(IntTy, 0);
7462+
7463+
while (RemainingBits > 0) {
7464+
unsigned ChunkBits =
7465+
std::min((uint64_t)(RemainingBits), 8 * Alignment.value());
7466+
Type *ChunkTy = Type::getIntNTy(LI->getContext(), ChunkBits);
7467+
Value *ChunkVal = Builder.CreateZExt(
7468+
Builder.CreateAlignedLoad(ChunkTy, Ptr, Alignment, IsVolatile), IntTy);
7469+
if (IsLE) {
7470+
ChunkVal = Builder.CreateShl(ChunkVal, BitWidth - RemainingBits);
7471+
} else {
7472+
ChunkVal = Builder.CreateShl(Val, RemainingBits - ChunkBits);
7473+
}
7474+
Val = Builder.CreateOr(Val, ChunkVal);
7475+
RemainingBits -= ChunkBits;
7476+
if (RemainingBits == 0)
7477+
break;
7478+
Ptr = Builder.CreateGEP(Int8Ty, Ptr,
7479+
ConstantInt::get(Int32Ty, ChunkBits / 8));
7480+
Alignment = getKnownAlignment(Ptr, *DL);
7481+
}
7482+
7483+
if (!ValTy->isIntegerTy())
7484+
Val = Builder.CreateBitCast(Val, ValTy);
7485+
LI->replaceAllUsesWith(Val);
7486+
LI->eraseFromParent();
7487+
return true;
7488+
}
7489+
73567490
/// Check if V (an operand of a select instruction) is an expensive instruction
73577491
/// that is only used once.
73587492
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
@@ -8750,6 +8884,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
87508884
return true;
87518885

87528886
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8887+
if (optimizeLoadMisalign(LI))
8888+
return true;
87538889
LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
87548890
bool Modified = optimizeLoadExt(LI);
87558891
unsigned AS = LI->getPointerAddressSpace();
@@ -8760,6 +8896,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
87608896
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
87618897
if (splitMergedValStore(*SI, *DL, *TLI))
87628898
return true;
8899+
if (optimizeStoreMisalign(SI))
8900+
return true;
87638901
SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
87648902
unsigned AS = SI->getPointerAddressSpace();
87658903
return optimizeMemoryInst(I, SI->getOperand(1),

llvm/test/CodeGen/AMDGPU/ds_read2.ll

Lines changed: 24 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,63 +1451,41 @@ define amdgpu_ps <2 x float> @ds_read_interp_read(i32 inreg %prims, ptr addrspac
14511451
define amdgpu_kernel void @read2_v2i32_align1_odd_offset(ptr addrspace(1) %out) {
14521452
; CI-LABEL: read2_v2i32_align1_odd_offset:
14531453
; CI: ; %bb.0: ; %entry
1454-
; CI-NEXT: v_mov_b32_e32 v0, 0
1454+
; CI-NEXT: v_mov_b32_e32 v1, 0
14551455
; CI-NEXT: s_mov_b32 m0, -1
1456-
; CI-NEXT: ds_read_u8 v1, v0 offset:70
1457-
; CI-NEXT: ds_read_u8 v2, v0 offset:72
1458-
; CI-NEXT: ds_read_u8 v3, v0 offset:71
1459-
; CI-NEXT: ds_read_u8 v4, v0 offset:69
1460-
; CI-NEXT: ds_read_u8 v5, v0 offset:68
1461-
; CI-NEXT: s_waitcnt lgkmcnt(4)
1462-
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
1463-
; CI-NEXT: s_waitcnt lgkmcnt(3)
1464-
; CI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
1465-
; CI-NEXT: s_waitcnt lgkmcnt(2)
1466-
; CI-NEXT: v_or_b32_e32 v2, v2, v3
1467-
; CI-NEXT: s_waitcnt lgkmcnt(1)
1468-
; CI-NEXT: v_or_b32_e32 v1, v1, v4
1469-
; CI-NEXT: ds_read_u8 v4, v0 offset:66
1470-
; CI-NEXT: ds_read_u8 v6, v0 offset:67
1471-
; CI-NEXT: ds_read_u8 v0, v0 offset:65
1472-
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1456+
; CI-NEXT: ds_read_u8 v2, v1 offset:65
1457+
; CI-NEXT: ds_read_u16 v3, v1 offset:66
1458+
; CI-NEXT: ds_read_b32 v0, v1 offset:68
1459+
; CI-NEXT: ds_read_u8 v4, v1 offset:72
14731460
; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1474-
; CI-NEXT: v_or_b32_e32 v1, v2, v1
1475-
; CI-NEXT: s_waitcnt lgkmcnt(0)
1476-
; CI-NEXT: v_lshlrev_b32_e32 v2, 8, v4
1477-
; CI-NEXT: v_or_b32_e32 v0, v2, v0
1478-
; CI-NEXT: v_lshlrev_b32_e32 v2, 8, v5
1479-
; CI-NEXT: v_or_b32_e32 v2, v2, v6
1480-
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
14811461
; CI-NEXT: s_mov_b32 s3, 0xf000
1462+
; CI-NEXT: s_waitcnt lgkmcnt(0)
1463+
; CI-NEXT: v_lshlrev_b32_e32 v3, 8, v3
1464+
; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 24
1465+
; CI-NEXT: v_or_b32_e32 v2, v2, v3
1466+
; CI-NEXT: v_or_b32_e32 v0, v0, v2
1467+
; CI-NEXT: v_lshlrev_b32_e32 v2, 24, v4
14821468
; CI-NEXT: s_mov_b32 s2, -1
1483-
; CI-NEXT: v_or_b32_e32 v0, v2, v0
1469+
; CI-NEXT: v_or_b32_e32 v1, v1, v2
14841470
; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
14851471
; CI-NEXT: s_endpgm
14861472
;
14871473
; GFX9-ALIGNED-LABEL: read2_v2i32_align1_odd_offset:
14881474
; GFX9-ALIGNED: ; %bb.0: ; %entry
1489-
; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v2, 0
1490-
; GFX9-ALIGNED-NEXT: ds_read_u8 v0, v2 offset:65
1491-
; GFX9-ALIGNED-NEXT: ds_read_u8 v3, v2 offset:66
1492-
; GFX9-ALIGNED-NEXT: ds_read_u8 v4, v2 offset:67
1493-
; GFX9-ALIGNED-NEXT: ds_read_u8 v5, v2 offset:68
1494-
; GFX9-ALIGNED-NEXT: ds_read_u8 v1, v2 offset:70
1495-
; GFX9-ALIGNED-NEXT: ds_read_u8 v6, v2 offset:69
1496-
; GFX9-ALIGNED-NEXT: ds_read_u8 v7, v2 offset:72
1497-
; GFX9-ALIGNED-NEXT: ds_read_u8 v8, v2 offset:71
1475+
; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v1, 0
1476+
; GFX9-ALIGNED-NEXT: ds_read_u16 v2, v1 offset:66
1477+
; GFX9-ALIGNED-NEXT: ds_read_b32 v0, v1 offset:68
1478+
; GFX9-ALIGNED-NEXT: ds_read_u8 v4, v1 offset:65
1479+
; GFX9-ALIGNED-NEXT: ds_read_u8 v5, v1 offset:72
14981480
; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14991481
; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0)
1500-
; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v1
1501-
; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v3, 8, v3
1502-
; GFX9-ALIGNED-NEXT: v_or_b32_e32 v1, v1, v6
1503-
; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v6, 8, v7
1504-
; GFX9-ALIGNED-NEXT: v_or_b32_e32 v0, v3, v0
1505-
; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v3, 8, v5
1506-
; GFX9-ALIGNED-NEXT: v_or_b32_sdwa v6, v6, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1507-
; GFX9-ALIGNED-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1508-
; GFX9-ALIGNED-NEXT: v_or_b32_e32 v1, v6, v1
1509-
; GFX9-ALIGNED-NEXT: v_or_b32_e32 v0, v3, v0
1510-
; GFX9-ALIGNED-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1482+
; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v6, 8, v2
1483+
; GFX9-ALIGNED-NEXT: v_lshlrev_b64 v[2:3], 24, v[0:1]
1484+
; GFX9-ALIGNED-NEXT: v_or_b32_sdwa v0, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1485+
; GFX9-ALIGNED-NEXT: v_or_b32_e32 v2, v0, v2
1486+
; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v0, 24, v5
1487+
; GFX9-ALIGNED-NEXT: v_or_b32_e32 v3, v3, v0
1488+
; GFX9-ALIGNED-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
15111489
; GFX9-ALIGNED-NEXT: s_endpgm
15121490
;
15131491
; GFX9-UNALIGNED-LABEL: read2_v2i32_align1_odd_offset:

llvm/test/CodeGen/AMDGPU/ds_write2.ll

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,31 +1009,21 @@ define amdgpu_kernel void @write2_v2i32_align1_odd_offset() {
10091009
; CI-NEXT: v_mov_b32_e32 v1, 0
10101010
; CI-NEXT: s_mov_b32 m0, -1
10111011
; CI-NEXT: ds_write_b8 v1, v0 offset:65
1012-
; CI-NEXT: v_mov_b32_e32 v0, 1
1013-
; CI-NEXT: ds_write_b8 v1, v0 offset:70
1014-
; CI-NEXT: v_mov_b32_e32 v0, 0xc8
1015-
; CI-NEXT: ds_write_b8 v1, v0 offset:69
1016-
; CI-NEXT: ds_write_b8 v1, v1 offset:68
1017-
; CI-NEXT: ds_write_b8 v1, v1 offset:67
1018-
; CI-NEXT: ds_write_b8 v1, v1 offset:66
1012+
; CI-NEXT: ds_write_b16 v1, v1 offset:66
1013+
; CI-NEXT: v_mov_b32_e32 v0, 0x1c800
1014+
; CI-NEXT: ds_write_b32 v1, v0 offset:68
10191015
; CI-NEXT: ds_write_b8 v1, v1 offset:72
1020-
; CI-NEXT: ds_write_b8 v1, v1 offset:71
10211016
; CI-NEXT: s_endpgm
10221017
;
10231018
; GFX9-ALIGNED-LABEL: write2_v2i32_align1_odd_offset:
10241019
; GFX9-ALIGNED: ; %bb.0: ; %entry
10251020
; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v0, 0x7b
10261021
; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v1, 0
10271022
; GFX9-ALIGNED-NEXT: ds_write_b8 v1, v0 offset:65
1028-
; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v0, 1
1029-
; GFX9-ALIGNED-NEXT: ds_write_b8 v1, v0 offset:70
1030-
; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v0, 0xc8
1031-
; GFX9-ALIGNED-NEXT: ds_write_b8 v1, v0 offset:69
1032-
; GFX9-ALIGNED-NEXT: ds_write_b8 v1, v1 offset:68
1033-
; GFX9-ALIGNED-NEXT: ds_write_b8 v1, v1 offset:67
1034-
; GFX9-ALIGNED-NEXT: ds_write_b8 v1, v1 offset:66
1023+
; GFX9-ALIGNED-NEXT: ds_write_b16 v1, v1 offset:66
1024+
; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v0, 0x1c800
1025+
; GFX9-ALIGNED-NEXT: ds_write_b32 v1, v0 offset:68
10351026
; GFX9-ALIGNED-NEXT: ds_write_b8 v1, v1 offset:72
1036-
; GFX9-ALIGNED-NEXT: ds_write_b8 v1, v1 offset:71
10371027
; GFX9-ALIGNED-NEXT: s_endpgm
10381028
;
10391029
; GFX9-UNALIGNED-LABEL: write2_v2i32_align1_odd_offset:

llvm/test/CodeGen/AVR/calling-conv/c/basic.ll

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,15 @@ define void @ret_void_args_i8_i32(i8 %a, i32 %b) {
1212
; CHECK: sts 4, r24
1313
store volatile i8 %a, ptr inttoptr (i64 4 to ptr)
1414

15-
; CHECK-NEXT: sts 8, r23
16-
; CHECK-NEXT: sts 7, r22
17-
; CHECK-NEXT: sts 6, r21
1815
; CHECK-NEXT: sts 5, r20
16+
17+
; redundant instructions, should be deleted
18+
; CHECK-NEXT: mov r24, r21
19+
; CHECK-NEXT: mov r25, r22
20+
21+
; CHECK-NEXT: sts 7, r25
22+
; CHECK-NEXT: sts 6, r24
23+
; CHECK-NEXT: sts 8, r23
1924
store volatile i32 %b, ptr inttoptr (i64 5 to ptr)
2025
ret void
2126
}

llvm/test/CodeGen/AVR/calling-conv/c/basic_aggr.ll

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,15 @@ start:
77
%0 = extractvalue { i8, i32 } %a, 0
88
store volatile i8 %0, ptr inttoptr (i64 4 to ptr)
99

10-
; CHECK-NEXT: sts 8, r24
11-
; CHECK-NEXT: sts 7, r23
12-
; CHECK-NEXT: sts 6, r22
1310
; CHECK-NEXT: sts 5, r21
11+
12+
; redundant instructions, should be deleted
13+
; CHECK-NEXT: mov r18, r22
14+
; CHECK-NEXT: mov r19, r23
15+
16+
; CHECK-NEXT: sts 7, r19
17+
; CHECK-NEXT: sts 6, r18
18+
; CHECK-NEXT: sts 8, r24
1419
%1 = extractvalue { i8, i32 } %a, 1
1520
store volatile i32 %1, ptr inttoptr (i64 5 to ptr)
1621
ret void
@@ -62,17 +67,22 @@ start:
6267
%0 = extractvalue { i8, i32 } %a, 0
6368
store volatile i8 %0, ptr inttoptr (i64 4 to ptr)
6469

65-
; CHECK-NEXT: sts 8, r24
66-
; CHECK-NEXT: sts 7, r23
67-
; CHECK-NEXT: sts 6, r22
6870
; CHECK-NEXT: sts 5, r21
71+
72+
; redundant instructions, should be deleted
73+
; CHECK-NEXT: mov r20, r22
74+
; CHECK-NEXT: mov r21, r23
75+
76+
; CHECK-NEXT: sts 7, r21
77+
; CHECK-NEXT: sts 6, r20
78+
; CHECK-NEXT: sts 8, r24
6979
%1 = extractvalue { i8, i32 } %a, 1
7080
store volatile i32 %1, ptr inttoptr (i64 5 to ptr)
7181

72-
; CHECK-NEXT: sts 9, r17
73-
; CHECK-NEXT: sts 8, r16
74-
; CHECK-NEXT: sts 7, r15
75-
; CHECK-NEXT: sts 6, r14
82+
; CHECK-NEXT: sts 9, r17
83+
; CHECK-NEXT: sts 8, r16
84+
; CHECK-NEXT: sts 7, r15
85+
; CHECK-NEXT: sts 6, r14
7686
%2 = extractvalue { i32, i8 } %b, 0
7787
store volatile i32 %2, ptr inttoptr (i64 6 to ptr)
7888

llvm/test/CodeGen/XCore/unaligned_load.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,11 @@ entry:
2424

2525
; Constant offset from word aligned base.
2626
; CHECK-LABEL: align3:
27-
; CHECK: ldw {{r[0-9]+}}, dp
28-
; CHECK: ldw {{r[0-9]+}}, dp
27+
; CHECK: ldaw {{r[0-9]+}}, dp
28+
; CHECK: ld8u
29+
; CHECK: ld16s
30+
; CHECK: or
31+
; CHECK: ld8u
2932
; CHECK: or
3033
define i32 @align3() nounwind {
3134
entry:

0 commit comments

Comments
 (0)