Skip to content

Commit 03e07d5

Browse files
committed
Use normal createMemsetAsLoop helper for memset.pattern
As pointed out in the review, with various changes to memset.pattern semantics since the first version these are now effectively identical.
1 parent c19adc1 commit 03e07d5

File tree

4 files changed

+200
-216
lines changed

4 files changed

+200
-216
lines changed

llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -824,48 +824,6 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
824824
}
825825
}
826826

827-
static void createMemSetPatternLoop(Instruction *InsertBefore, Value *DstAddr,
828-
Value *Count, Value *SetValue,
829-
Align DstAlign, bool IsVolatile) {
830-
BasicBlock *OrigBB = InsertBefore->getParent();
831-
Function *F = OrigBB->getParent();
832-
const DataLayout &DL = F->getDataLayout();
833-
834-
Type *TypeOfCount = Count->getType();
835-
836-
BasicBlock *NewBB = OrigBB->splitBasicBlock(InsertBefore, "split");
837-
BasicBlock *LoopBB =
838-
BasicBlock::Create(F->getContext(), "storeloop", F, NewBB);
839-
IRBuilder<> Builder(OrigBB->getTerminator());
840-
841-
Builder.CreateCondBr(
842-
Builder.CreateICmpEQ(ConstantInt::get(TypeOfCount, 0), Count), NewBB,
843-
LoopBB);
844-
OrigBB->getTerminator()->eraseFromParent();
845-
846-
IRBuilder<> LoopBuilder(LoopBB);
847-
PHINode *CurrentDst = LoopBuilder.CreatePHI(DstAddr->getType(), 0);
848-
CurrentDst->addIncoming(DstAddr, OrigBB);
849-
PHINode *LoopCount = LoopBuilder.CreatePHI(TypeOfCount, 0);
850-
LoopCount->addIncoming(Count, OrigBB);
851-
852-
unsigned PatSize = DL.getTypeStoreSize(SetValue->getType());
853-
Align PatAlign(commonAlignment(DstAlign, PatSize));
854-
LoopBuilder.CreateAlignedStore(SetValue, CurrentDst, PatAlign, IsVolatile);
855-
856-
Value *NextDst = LoopBuilder.CreateInBoundsGEP(
857-
SetValue->getType(), CurrentDst, ConstantInt::get(TypeOfCount, 1));
858-
CurrentDst->addIncoming(NextDst, LoopBB);
859-
860-
Value *NewLoopCount =
861-
LoopBuilder.CreateSub(LoopCount, ConstantInt::get(TypeOfCount, 1));
862-
LoopCount->addIncoming(NewLoopCount, LoopBB);
863-
864-
LoopBuilder.CreateCondBr(
865-
LoopBuilder.CreateICmpNE(NewLoopCount, ConstantInt::get(TypeOfCount, 0)),
866-
LoopBB, NewBB);
867-
}
868-
869827
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
870828
Value *CopyLen, Value *SetValue, Align DstAlign,
871829
bool IsVolatile) {
@@ -1004,16 +962,6 @@ bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove,
1004962
}
1005963

1006964
void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
1007-
if (isa<MemSetPatternInst>(Memset)) {
1008-
return createMemSetPatternLoop(
1009-
/* InsertBefore */ Memset,
1010-
/* DstAddr */ Memset->getRawDest(),
1011-
/* Count */ Memset->getLength(),
1012-
/* SetValue */ Memset->getValue(),
1013-
/* Alignment */ Memset->getDestAlign().valueOrOne(),
1014-
Memset->isVolatile());
1015-
}
1016-
1017965
createMemSetLoop(/* InsertBefore */ Memset,
1018966
/* DstAddr */ Memset->getRawDest(),
1019967
/* CopyLen */ Memset->getLength(),

llvm/test/CodeGen/RISCV/memset-pattern.ll

Lines changed: 144 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -13,27 +13,33 @@
1313

1414
define void @memset_1(ptr %a, i128 %value) nounwind {
1515
; RV32-BOTH-LABEL: memset_1:
16-
; RV32-BOTH: # %bb.0: # %storeloop.preheader
16+
; RV32-BOTH: # %bb.0: # %loadstoreloop.preheader
1717
; RV32-BOTH-NEXT: lw a2, 0(a1)
1818
; RV32-BOTH-NEXT: lw a3, 4(a1)
1919
; RV32-BOTH-NEXT: lw a4, 8(a1)
2020
; RV32-BOTH-NEXT: lw a1, 12(a1)
21-
; RV32-BOTH-NEXT: addi a5, a0, 16
22-
; RV32-BOTH-NEXT: .LBB0_1: # %storeloop
21+
; RV32-BOTH-NEXT: li a5, 0
22+
; RV32-BOTH-NEXT: li a6, 0
23+
; RV32-BOTH-NEXT: .LBB0_1: # %loadstoreloop
2324
; RV32-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
24-
; RV32-BOTH-NEXT: sw a2, 0(a0)
25-
; RV32-BOTH-NEXT: sw a3, 4(a0)
26-
; RV32-BOTH-NEXT: sw a4, 8(a0)
27-
; RV32-BOTH-NEXT: sw a1, 12(a0)
28-
; RV32-BOTH-NEXT: addi a0, a0, 16
29-
; RV32-BOTH-NEXT: bne a0, a5, .LBB0_1
25+
; RV32-BOTH-NEXT: slli a7, a5, 4
26+
; RV32-BOTH-NEXT: add a7, a0, a7
27+
; RV32-BOTH-NEXT: addi a5, a5, 1
28+
; RV32-BOTH-NEXT: seqz t0, a5
29+
; RV32-BOTH-NEXT: add a6, a6, t0
30+
; RV32-BOTH-NEXT: or t0, a5, a6
31+
; RV32-BOTH-NEXT: sw a2, 0(a7)
32+
; RV32-BOTH-NEXT: sw a3, 4(a7)
33+
; RV32-BOTH-NEXT: sw a4, 8(a7)
34+
; RV32-BOTH-NEXT: sw a1, 12(a7)
35+
; RV32-BOTH-NEXT: beqz t0, .LBB0_1
3036
; RV32-BOTH-NEXT: # %bb.2: # %split
3137
; RV32-BOTH-NEXT: ret
3238
;
3339
; RV64-BOTH-LABEL: memset_1:
34-
; RV64-BOTH: # %bb.0: # %storeloop.preheader
40+
; RV64-BOTH: # %bb.0: # %loadstoreloop.preheader
3541
; RV64-BOTH-NEXT: addi a3, a0, 16
36-
; RV64-BOTH-NEXT: .LBB0_1: # %storeloop
42+
; RV64-BOTH-NEXT: .LBB0_1: # %loadstoreloop
3743
; RV64-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
3844
; RV64-BOTH-NEXT: sd a1, 0(a0)
3945
; RV64-BOTH-NEXT: sd a2, 8(a0)
@@ -47,57 +53,69 @@ define void @memset_1(ptr %a, i128 %value) nounwind {
4753

4854
define void @memset_1_noalign(ptr %a, i128 %value) nounwind {
4955
; RV32-LABEL: memset_1_noalign:
50-
; RV32: # %bb.0: # %storeloop.preheader
51-
; RV32-NEXT: addi sp, sp, -16
52-
; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
53-
; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
54-
; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
55-
; RV32-NEXT: lw a2, 0(a1)
56-
; RV32-NEXT: lw a3, 4(a1)
57-
; RV32-NEXT: lw a4, 8(a1)
56+
; RV32: # %bb.0: # %loadstoreloop.preheader
57+
; RV32-NEXT: addi sp, sp, -32
58+
; RV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill
59+
; RV32-NEXT: sw s1, 24(sp) # 4-byte Folded Spill
60+
; RV32-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
61+
; RV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
62+
; RV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill
63+
; RV32-NEXT: sw s5, 8(sp) # 4-byte Folded Spill
64+
; RV32-NEXT: li a2, 0
65+
; RV32-NEXT: li a3, 0
66+
; RV32-NEXT: lw a4, 4(a1)
67+
; RV32-NEXT: lw a5, 0(a1)
68+
; RV32-NEXT: lw a6, 8(a1)
5869
; RV32-NEXT: lw a1, 12(a1)
59-
; RV32-NEXT: addi a5, a0, 16
60-
; RV32-NEXT: srli a6, a2, 24
61-
; RV32-NEXT: srli a7, a2, 16
62-
; RV32-NEXT: srli t0, a2, 8
63-
; RV32-NEXT: srli t1, a3, 24
64-
; RV32-NEXT: srli t2, a3, 16
65-
; RV32-NEXT: srli t3, a3, 8
66-
; RV32-NEXT: srli t4, a4, 24
67-
; RV32-NEXT: srli t5, a4, 16
68-
; RV32-NEXT: srli t6, a4, 8
69-
; RV32-NEXT: srli s0, a1, 24
70-
; RV32-NEXT: srli s1, a1, 16
71-
; RV32-NEXT: srli s2, a1, 8
72-
; RV32-NEXT: .LBB1_1: # %storeloop
70+
; RV32-NEXT: srli a7, a4, 24
71+
; RV32-NEXT: srli t0, a4, 16
72+
; RV32-NEXT: srli t1, a4, 8
73+
; RV32-NEXT: srli t2, a5, 24
74+
; RV32-NEXT: srli t3, a5, 16
75+
; RV32-NEXT: srli t4, a5, 8
76+
; RV32-NEXT: srli t5, a6, 24
77+
; RV32-NEXT: srli t6, a6, 16
78+
; RV32-NEXT: srli s0, a6, 8
79+
; RV32-NEXT: srli s1, a1, 24
80+
; RV32-NEXT: srli s2, a1, 16
81+
; RV32-NEXT: srli s3, a1, 8
82+
; RV32-NEXT: .LBB1_1: # %loadstoreloop
7383
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
74-
; RV32-NEXT: sb a2, 0(a0)
75-
; RV32-NEXT: sb t0, 1(a0)
76-
; RV32-NEXT: sb a7, 2(a0)
77-
; RV32-NEXT: sb a6, 3(a0)
78-
; RV32-NEXT: sb a3, 4(a0)
79-
; RV32-NEXT: sb t3, 5(a0)
80-
; RV32-NEXT: sb t2, 6(a0)
81-
; RV32-NEXT: sb t1, 7(a0)
82-
; RV32-NEXT: sb a4, 8(a0)
83-
; RV32-NEXT: sb t6, 9(a0)
84-
; RV32-NEXT: sb t5, 10(a0)
85-
; RV32-NEXT: sb t4, 11(a0)
86-
; RV32-NEXT: sb a1, 12(a0)
87-
; RV32-NEXT: sb s2, 13(a0)
88-
; RV32-NEXT: sb s1, 14(a0)
89-
; RV32-NEXT: sb s0, 15(a0)
90-
; RV32-NEXT: addi a0, a0, 16
91-
; RV32-NEXT: bne a0, a5, .LBB1_1
84+
; RV32-NEXT: slli s4, a2, 4
85+
; RV32-NEXT: add s4, a0, s4
86+
; RV32-NEXT: sb a4, 4(s4)
87+
; RV32-NEXT: sb t1, 5(s4)
88+
; RV32-NEXT: sb t0, 6(s4)
89+
; RV32-NEXT: sb a7, 7(s4)
90+
; RV32-NEXT: sb a5, 0(s4)
91+
; RV32-NEXT: sb t4, 1(s4)
92+
; RV32-NEXT: sb t3, 2(s4)
93+
; RV32-NEXT: sb t2, 3(s4)
94+
; RV32-NEXT: sb a6, 8(s4)
95+
; RV32-NEXT: sb s0, 9(s4)
96+
; RV32-NEXT: sb t6, 10(s4)
97+
; RV32-NEXT: sb t5, 11(s4)
98+
; RV32-NEXT: addi a2, a2, 1
99+
; RV32-NEXT: seqz s5, a2
100+
; RV32-NEXT: add a3, a3, s5
101+
; RV32-NEXT: or s5, a2, a3
102+
; RV32-NEXT: sb a1, 12(s4)
103+
; RV32-NEXT: sb s3, 13(s4)
104+
; RV32-NEXT: sb s2, 14(s4)
105+
; RV32-NEXT: sb s1, 15(s4)
106+
; RV32-NEXT: beqz s5, .LBB1_1
92107
; RV32-NEXT: # %bb.2: # %split
93-
; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
94-
; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
95-
; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
96-
; RV32-NEXT: addi sp, sp, 16
108+
; RV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload
109+
; RV32-NEXT: lw s1, 24(sp) # 4-byte Folded Reload
110+
; RV32-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
111+
; RV32-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
112+
; RV32-NEXT: lw s4, 12(sp) # 4-byte Folded Reload
113+
; RV32-NEXT: lw s5, 8(sp) # 4-byte Folded Reload
114+
; RV32-NEXT: addi sp, sp, 32
97115
; RV32-NEXT: ret
98116
;
99117
; RV64-LABEL: memset_1_noalign:
100-
; RV64: # %bb.0: # %storeloop.preheader
118+
; RV64: # %bb.0: # %loadstoreloop.preheader
101119
; RV64-NEXT: addi sp, sp, -32
102120
; RV64-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
103121
; RV64-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
@@ -117,7 +135,7 @@ define void @memset_1_noalign(ptr %a, i128 %value) nounwind {
117135
; RV64-NEXT: srli s0, a2, 24
118136
; RV64-NEXT: srli s1, a2, 16
119137
; RV64-NEXT: srli s2, a2, 8
120-
; RV64-NEXT: .LBB1_1: # %storeloop
138+
; RV64-NEXT: .LBB1_1: # %loadstoreloop
121139
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
122140
; RV64-NEXT: sb a7, 4(a0)
123141
; RV64-NEXT: sb a6, 5(a0)
@@ -145,27 +163,33 @@ define void @memset_1_noalign(ptr %a, i128 %value) nounwind {
145163
; RV64-NEXT: ret
146164
;
147165
; RV32-FAST-LABEL: memset_1_noalign:
148-
; RV32-FAST: # %bb.0: # %storeloop.preheader
166+
; RV32-FAST: # %bb.0: # %loadstoreloop.preheader
149167
; RV32-FAST-NEXT: lw a2, 0(a1)
150168
; RV32-FAST-NEXT: lw a3, 4(a1)
151169
; RV32-FAST-NEXT: lw a4, 8(a1)
152170
; RV32-FAST-NEXT: lw a1, 12(a1)
153-
; RV32-FAST-NEXT: addi a5, a0, 16
154-
; RV32-FAST-NEXT: .LBB1_1: # %storeloop
171+
; RV32-FAST-NEXT: li a5, 0
172+
; RV32-FAST-NEXT: li a6, 0
173+
; RV32-FAST-NEXT: .LBB1_1: # %loadstoreloop
155174
; RV32-FAST-NEXT: # =>This Inner Loop Header: Depth=1
156-
; RV32-FAST-NEXT: sw a2, 0(a0)
157-
; RV32-FAST-NEXT: sw a3, 4(a0)
158-
; RV32-FAST-NEXT: sw a4, 8(a0)
159-
; RV32-FAST-NEXT: sw a1, 12(a0)
160-
; RV32-FAST-NEXT: addi a0, a0, 16
161-
; RV32-FAST-NEXT: bne a0, a5, .LBB1_1
175+
; RV32-FAST-NEXT: slli a7, a5, 4
176+
; RV32-FAST-NEXT: add a7, a0, a7
177+
; RV32-FAST-NEXT: addi a5, a5, 1
178+
; RV32-FAST-NEXT: seqz t0, a5
179+
; RV32-FAST-NEXT: add a6, a6, t0
180+
; RV32-FAST-NEXT: or t0, a5, a6
181+
; RV32-FAST-NEXT: sw a2, 0(a7)
182+
; RV32-FAST-NEXT: sw a3, 4(a7)
183+
; RV32-FAST-NEXT: sw a4, 8(a7)
184+
; RV32-FAST-NEXT: sw a1, 12(a7)
185+
; RV32-FAST-NEXT: beqz t0, .LBB1_1
162186
; RV32-FAST-NEXT: # %bb.2: # %split
163187
; RV32-FAST-NEXT: ret
164188
;
165189
; RV64-FAST-LABEL: memset_1_noalign:
166-
; RV64-FAST: # %bb.0: # %storeloop.preheader
190+
; RV64-FAST: # %bb.0: # %loadstoreloop.preheader
167191
; RV64-FAST-NEXT: addi a3, a0, 16
168-
; RV64-FAST-NEXT: .LBB1_1: # %storeloop
192+
; RV64-FAST-NEXT: .LBB1_1: # %loadstoreloop
169193
; RV64-FAST-NEXT: # =>This Inner Loop Header: Depth=1
170194
; RV64-FAST-NEXT: sd a1, 0(a0)
171195
; RV64-FAST-NEXT: sd a2, 8(a0)
@@ -179,27 +203,35 @@ define void @memset_1_noalign(ptr %a, i128 %value) nounwind {
179203

180204
define void @memset_4(ptr %a, i128 %value) nounwind {
181205
; RV32-BOTH-LABEL: memset_4:
182-
; RV32-BOTH: # %bb.0: # %storeloop.preheader
206+
; RV32-BOTH: # %bb.0: # %loadstoreloop.preheader
183207
; RV32-BOTH-NEXT: lw a2, 0(a1)
184208
; RV32-BOTH-NEXT: lw a3, 4(a1)
185209
; RV32-BOTH-NEXT: lw a4, 8(a1)
186210
; RV32-BOTH-NEXT: lw a1, 12(a1)
187-
; RV32-BOTH-NEXT: addi a5, a0, 64
188-
; RV32-BOTH-NEXT: .LBB2_1: # %storeloop
211+
; RV32-BOTH-NEXT: li a5, 0
212+
; RV32-BOTH-NEXT: li a6, 0
213+
; RV32-BOTH-NEXT: .LBB2_1: # %loadstoreloop
189214
; RV32-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
190-
; RV32-BOTH-NEXT: sw a2, 0(a0)
191-
; RV32-BOTH-NEXT: sw a3, 4(a0)
192-
; RV32-BOTH-NEXT: sw a4, 8(a0)
193-
; RV32-BOTH-NEXT: sw a1, 12(a0)
194-
; RV32-BOTH-NEXT: addi a0, a0, 16
195-
; RV32-BOTH-NEXT: bne a0, a5, .LBB2_1
215+
; RV32-BOTH-NEXT: slli a7, a5, 4
216+
; RV32-BOTH-NEXT: add a7, a0, a7
217+
; RV32-BOTH-NEXT: addi a5, a5, 1
218+
; RV32-BOTH-NEXT: seqz t0, a5
219+
; RV32-BOTH-NEXT: add a6, a6, t0
220+
; RV32-BOTH-NEXT: seqz t0, a6
221+
; RV32-BOTH-NEXT: sltiu t1, a5, 4
222+
; RV32-BOTH-NEXT: and t0, t0, t1
223+
; RV32-BOTH-NEXT: sw a2, 0(a7)
224+
; RV32-BOTH-NEXT: sw a3, 4(a7)
225+
; RV32-BOTH-NEXT: sw a4, 8(a7)
226+
; RV32-BOTH-NEXT: sw a1, 12(a7)
227+
; RV32-BOTH-NEXT: bnez t0, .LBB2_1
196228
; RV32-BOTH-NEXT: # %bb.2: # %split
197229
; RV32-BOTH-NEXT: ret
198230
;
199231
; RV64-BOTH-LABEL: memset_4:
200-
; RV64-BOTH: # %bb.0: # %storeloop.preheader
232+
; RV64-BOTH: # %bb.0: # %loadstoreloop.preheader
201233
; RV64-BOTH-NEXT: addi a3, a0, 64
202-
; RV64-BOTH-NEXT: .LBB2_1: # %storeloop
234+
; RV64-BOTH-NEXT: .LBB2_1: # %loadstoreloop
203235
; RV64-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
204236
; RV64-BOTH-NEXT: sd a1, 0(a0)
205237
; RV64-BOTH-NEXT: sd a2, 8(a0)
@@ -214,38 +246,50 @@ define void @memset_4(ptr %a, i128 %value) nounwind {
214246
define void @memset_x(ptr %a, i128 %value, i64 %x) nounwind {
215247
; RV32-BOTH-LABEL: memset_x:
216248
; RV32-BOTH: # %bb.0:
217-
; RV32-BOTH-NEXT: or a3, a2, a3
218-
; RV32-BOTH-NEXT: beqz a3, .LBB3_3
219-
; RV32-BOTH-NEXT: # %bb.1: # %storeloop.preheader
220-
; RV32-BOTH-NEXT: lw a3, 0(a1)
221-
; RV32-BOTH-NEXT: lw a4, 4(a1)
222-
; RV32-BOTH-NEXT: lw a5, 8(a1)
249+
; RV32-BOTH-NEXT: or a4, a2, a3
250+
; RV32-BOTH-NEXT: beqz a4, .LBB3_5
251+
; RV32-BOTH-NEXT: # %bb.1: # %loadstoreloop.preheader
252+
; RV32-BOTH-NEXT: lw a4, 0(a1)
253+
; RV32-BOTH-NEXT: lw a5, 4(a1)
254+
; RV32-BOTH-NEXT: lw a6, 8(a1)
223255
; RV32-BOTH-NEXT: lw a1, 12(a1)
224-
; RV32-BOTH-NEXT: slli a2, a2, 4
225-
; RV32-BOTH-NEXT: add a2, a0, a2
226-
; RV32-BOTH-NEXT: .LBB3_2: # %storeloop
256+
; RV32-BOTH-NEXT: li a7, 0
257+
; RV32-BOTH-NEXT: li t0, 0
258+
; RV32-BOTH-NEXT: j .LBB3_3
259+
; RV32-BOTH-NEXT: .LBB3_2: # %loadstoreloop
260+
; RV32-BOTH-NEXT: # in Loop: Header=BB3_3 Depth=1
261+
; RV32-BOTH-NEXT: sltu t1, t0, a3
262+
; RV32-BOTH-NEXT: beqz t1, .LBB3_5
263+
; RV32-BOTH-NEXT: .LBB3_3: # %loadstoreloop
227264
; RV32-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
228-
; RV32-BOTH-NEXT: sw a3, 0(a0)
229-
; RV32-BOTH-NEXT: sw a4, 4(a0)
230-
; RV32-BOTH-NEXT: sw a5, 8(a0)
231-
; RV32-BOTH-NEXT: sw a1, 12(a0)
232-
; RV32-BOTH-NEXT: addi a0, a0, 16
233-
; RV32-BOTH-NEXT: bne a0, a2, .LBB3_2
234-
; RV32-BOTH-NEXT: .LBB3_3: # %split
265+
; RV32-BOTH-NEXT: slli t1, a7, 4
266+
; RV32-BOTH-NEXT: add t1, a0, t1
267+
; RV32-BOTH-NEXT: addi a7, a7, 1
268+
; RV32-BOTH-NEXT: seqz t2, a7
269+
; RV32-BOTH-NEXT: add t0, t0, t2
270+
; RV32-BOTH-NEXT: sw a4, 0(t1)
271+
; RV32-BOTH-NEXT: sw a5, 4(t1)
272+
; RV32-BOTH-NEXT: sw a6, 8(t1)
273+
; RV32-BOTH-NEXT: sw a1, 12(t1)
274+
; RV32-BOTH-NEXT: bne t0, a3, .LBB3_2
275+
; RV32-BOTH-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
276+
; RV32-BOTH-NEXT: sltu t1, a7, a2
277+
; RV32-BOTH-NEXT: bnez t1, .LBB3_3
278+
; RV32-BOTH-NEXT: .LBB3_5: # %split
235279
; RV32-BOTH-NEXT: ret
236280
;
237281
; RV64-BOTH-LABEL: memset_x:
238282
; RV64-BOTH: # %bb.0:
239283
; RV64-BOTH-NEXT: beqz a3, .LBB3_3
240-
; RV64-BOTH-NEXT: # %bb.1: # %storeloop.preheader
241-
; RV64-BOTH-NEXT: slli a3, a3, 4
242-
; RV64-BOTH-NEXT: add a3, a0, a3
243-
; RV64-BOTH-NEXT: .LBB3_2: # %storeloop
284+
; RV64-BOTH-NEXT: # %bb.1: # %loadstoreloop.preheader
285+
; RV64-BOTH-NEXT: li a4, 0
286+
; RV64-BOTH-NEXT: .LBB3_2: # %loadstoreloop
244287
; RV64-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
245288
; RV64-BOTH-NEXT: sd a1, 0(a0)
246289
; RV64-BOTH-NEXT: sd a2, 8(a0)
290+
; RV64-BOTH-NEXT: addi a4, a4, 1
247291
; RV64-BOTH-NEXT: addi a0, a0, 16
248-
; RV64-BOTH-NEXT: bne a0, a3, .LBB3_2
292+
; RV64-BOTH-NEXT: bltu a4, a3, .LBB3_2
249293
; RV64-BOTH-NEXT: .LBB3_3: # %split
250294
; RV64-BOTH-NEXT: ret
251295
tail call void @llvm.memset.pattern(ptr align 8 %a, i128 %value, i64 %x, i1 0)

0 commit comments

Comments
 (0)