Skip to content

Commit 2203ed1

Browse files
committed
[ExpandMemCmp][AArch64][PowerPC][RISCV][X86] Use llvm.ucmp instead of (sub (zext (icmp ugt)), (zext (icmp ult))).
AArch64 looks like an improvement. Not sure about PowerPC. RISC-V is neutral. X86 trades a dependency breaking xor before a seta for a movsx after a sbbb. Depending on how the result is used, this movsx might go away.
1 parent f03b100 commit 2203ed1

File tree

18 files changed

+133
-189
lines changed

18 files changed

+133
-189
lines changed

llvm/lib/CodeGen/ExpandMemCmp.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -696,17 +696,9 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
696696
}
697697
}
698698

699-
// The result of memcmp is negative, zero, or positive, so produce that by
700-
// subtracting 2 extended compare bits: sub (ugt, ult).
701-
// If a target prefers to use selects to get -1/0/1, they should be able
702-
// to transform this later. The inverse transform (going from selects to math)
703-
// may not be possible in the DAG because the selects got converted into
704-
// branches before we got there.
705-
Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
706-
Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
707-
Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
708-
Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
709-
return Builder.CreateSub(ZextUGT, ZextULT);
699+
// The result of memcmp is negative, zero, or positive.
700+
return Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::ucmp,
701+
{Loads.Lhs, Loads.Rhs});
710702
}
711703

712704
// This function expands the memcmp call into an inline expansion and returns

llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,9 +313,8 @@ define void @one_dimensional_with_store(ptr %a, ptr %b, ptr %c, i32 %N) {
313313
; CHECK-NEXT: rev w9, w9
314314
; CHECK-NEXT: cmp w9, w10
315315
; CHECK-NEXT: cset w9, hi
316-
; CHECK-NEXT: cset w10, lo
316+
; CHECK-NEXT: csinv w9, w9, wzr, hs
317317
; CHECK-NEXT: subs x8, x8, #1
318-
; CHECK-NEXT: sub w9, w9, w10
319318
; CHECK-NEXT: strb w9, [x2], #1
320319
; CHECK-NEXT: b.ne .LBB4_1
321320
; CHECK-NEXT: // %bb.2: // %for.exit

llvm/test/CodeGen/AArch64/memcmp.ll

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,7 @@ define i32 @length3(ptr %X, ptr %Y) nounwind {
162162
; CHECK-NEXT: rev w9, w9
163163
; CHECK-NEXT: cmp w8, w9
164164
; CHECK-NEXT: cset w8, hi
165-
; CHECK-NEXT: cset w9, lo
166-
; CHECK-NEXT: sub w0, w8, w9
165+
; CHECK-NEXT: csinv w0, w8, wzr, hs
167166
; CHECK-NEXT: ret
168167
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
169168
ret i32 %m
@@ -194,8 +193,7 @@ define i32 @length4(ptr %X, ptr %Y) nounwind {
194193
; CHECK-NEXT: rev w9, w9
195194
; CHECK-NEXT: cmp w8, w9
196195
; CHECK-NEXT: cset w8, hi
197-
; CHECK-NEXT: cset w9, lo
198-
; CHECK-NEXT: sub w0, w8, w9
196+
; CHECK-NEXT: csinv w0, w8, wzr, hs
199197
; CHECK-NEXT: ret
200198
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
201199
ret i32 %m
@@ -286,8 +284,7 @@ define i32 @length5(ptr %X, ptr %Y) nounwind {
286284
; CHECK-NEXT: rev x9, x9
287285
; CHECK-NEXT: cmp x8, x9
288286
; CHECK-NEXT: cset w8, hi
289-
; CHECK-NEXT: cset w9, lo
290-
; CHECK-NEXT: sub w0, w8, w9
287+
; CHECK-NEXT: csinv w0, w8, wzr, hs
291288
; CHECK-NEXT: ret
292289
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
293290
ret i32 %m
@@ -341,8 +338,7 @@ define i32 @length6(ptr %X, ptr %Y) nounwind {
341338
; CHECK-NEXT: rev x9, x9
342339
; CHECK-NEXT: cmp x8, x9
343340
; CHECK-NEXT: cset w8, hi
344-
; CHECK-NEXT: cset w9, lo
345-
; CHECK-NEXT: sub w0, w8, w9
341+
; CHECK-NEXT: csinv w0, w8, wzr, hs
346342
; CHECK-NEXT: ret
347343
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind
348344
ret i32 %m
@@ -450,8 +446,7 @@ define i32 @length8(ptr %X, ptr %Y) nounwind {
450446
; CHECK-NEXT: rev x9, x9
451447
; CHECK-NEXT: cmp x8, x9
452448
; CHECK-NEXT: cset w8, hi
453-
; CHECK-NEXT: cset w9, lo
454-
; CHECK-NEXT: sub w0, w8, w9
449+
; CHECK-NEXT: csinv w0, w8, wzr, hs
455450
; CHECK-NEXT: ret
456451
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
457452
ret i32 %m

llvm/test/CodeGen/PowerPC/memcmp.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,12 @@ define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture reado
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: ldbrx 3, 0, 3
88
; CHECK-NEXT: ldbrx 4, 0, 4
9-
; CHECK-NEXT: subc 5, 4, 3
10-
; CHECK-NEXT: subfe 5, 4, 4
11-
; CHECK-NEXT: subc 4, 3, 4
12-
; CHECK-NEXT: subfe 3, 3, 3
13-
; CHECK-NEXT: neg 5, 5
9+
; CHECK-NEXT: cmpld 3, 4
10+
; CHECK-NEXT: subc 3, 4, 3
11+
; CHECK-NEXT: subfe 3, 4, 4
12+
; CHECK-NEXT: li 4, -1
1413
; CHECK-NEXT: neg 3, 3
15-
; CHECK-NEXT: sub 3, 5, 3
14+
; CHECK-NEXT: isellt 3, 4, 3
1615
; CHECK-NEXT: extsw 3, 3
1716
; CHECK-NEXT: blr
1817
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8)
@@ -24,12 +23,11 @@ define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture reado
2423
; CHECK: # %bb.0:
2524
; CHECK-NEXT: lwbrx 3, 0, 3
2625
; CHECK-NEXT: lwbrx 4, 0, 4
26+
; CHECK-NEXT: cmplw 3, 4
2727
; CHECK-NEXT: sub 5, 4, 3
28-
; CHECK-NEXT: sub 3, 3, 4
28+
; CHECK-NEXT: li 3, -1
2929
; CHECK-NEXT: rldicl 5, 5, 1, 63
30-
; CHECK-NEXT: rldicl 3, 3, 1, 63
31-
; CHECK-NEXT: sub 3, 5, 3
32-
; CHECK-NEXT: extsw 3, 3
30+
; CHECK-NEXT: isellt 3, 3, 5
3331
; CHECK-NEXT: blr
3432
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)
3533
ret i32 %call

llvm/test/CodeGen/PowerPC/memcmpIR.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,22 +59,14 @@ define signext i32 @test2(ptr nocapture readonly %buffer1, ptr nocapture readonl
5959
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr
6060
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
6161
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
62-
; CHECK-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[BSWAP1]], [[BSWAP2]]
63-
; CHECK-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[BSWAP1]], [[BSWAP2]]
64-
; CHECK-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
65-
; CHECK-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
66-
; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
67-
; CHECK-NEXT: ret i32 [[SUB]]
62+
; CHECK-NEXT: [[UCMP:%[0-9]+]] = call i32 @llvm.ucmp.i32.i32(i32 [[BSWAP1]], i32 [[BSWAP2]])
63+
; CHECK-NEXT: ret i32 [[UCMP]]
6864

6965
; CHECK-BE-LABEL: @test2(
7066
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, ptr
7167
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr
72-
; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[LOAD1]], [[LOAD2]]
73-
; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]]
74-
; CHECK-BE-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
75-
; CHECK-BE-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
76-
; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
77-
; CHECK-BE-NEXT: ret i32 [[SUB]]
68+
; CHECK-BE-NEXT: [[UCMP:%[0-9]+]] = call i32 @llvm.ucmp.i32.i32(i32 [[LOAD1]], i32 [[LOAD2]])
69+
; CHECK-BE-NEXT: ret i32 [[UCMP]]
7870

7971
entry:
8072
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)

llvm/test/CodeGen/RISCV/memcmp-optsize.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2658,9 +2658,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
26582658
; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a1, 0(a1)
26592659
; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a0, a0
26602660
; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a1, a1
2661-
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a1, a0
2662-
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a0, a1
2663-
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a2, a0
2661+
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a0, a1
2662+
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a1, a0
2663+
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a0, a2
26642664
; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
26652665
;
26662666
; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_4:
@@ -2671,9 +2671,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
26712671
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
26722672
; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a0, a0, 32
26732673
; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a1, a1, 32
2674-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
2675-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
2676-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
2674+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1
2675+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0
2676+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2
26772677
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
26782678
;
26792679
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_4:
@@ -2682,9 +2682,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
26822682
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a1, 0(a1)
26832683
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a0, a0
26842684
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a1, a1
2685-
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a1, a0
2686-
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a0, a1
2687-
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a2, a0
2685+
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a0, a1
2686+
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a1, a0
2687+
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a0, a2
26882688
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
26892689
;
26902690
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_4:
@@ -2695,9 +2695,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
26952695
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
26962696
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a0, a0, 32
26972697
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a1, a1, 32
2698-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
2699-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
2700-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
2698+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1
2699+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0
2700+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2
27012701
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
27022702
;
27032703
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_4:
@@ -3500,9 +3500,9 @@ define i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize {
35003500
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ld a1, 0(a1)
35013501
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0
35023502
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
3503-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
3504-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
3505-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
3503+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1
3504+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0
3505+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2
35063506
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
35073507
;
35083508
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_8:
@@ -3533,9 +3533,9 @@ define i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize {
35333533
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ld a1, 0(a1)
35343534
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0
35353535
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
3536-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
3537-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
3538-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
3536+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1
3537+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0
3538+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2
35393539
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
35403540
;
35413541
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_8:

llvm/test/CodeGen/RISCV/memcmp.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3354,9 +3354,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
33543354
; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a1, 0(a1)
33553355
; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a0, a0
33563356
; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a1, a1
3357-
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a1, a0
3358-
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a0, a1
3359-
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a2, a0
3357+
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a0, a1
3358+
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a1, a0
3359+
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a0, a2
33603360
; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
33613361
;
33623362
; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_4:
@@ -3367,9 +3367,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
33673367
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
33683368
; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a0, a0, 32
33693369
; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a1, a1, 32
3370-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
3371-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
3372-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
3370+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1
3371+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0
3372+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2
33733373
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
33743374
;
33753375
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_4:
@@ -3378,9 +3378,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
33783378
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a1, 0(a1)
33793379
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a0, a0
33803380
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a1, a1
3381-
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a1, a0
3382-
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a0, a1
3383-
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a2, a0
3381+
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a0, a1
3382+
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a1, a0
3383+
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a0, a2
33843384
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
33853385
;
33863386
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_4:
@@ -3391,9 +3391,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
33913391
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
33923392
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a0, a0, 32
33933393
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a1, a1, 32
3394-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
3395-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
3396-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
3394+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1
3395+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0
3396+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2
33973397
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
33983398
;
33993399
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_4:
@@ -4196,9 +4196,9 @@ define i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind {
41964196
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ld a1, 0(a1)
41974197
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0
41984198
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
4199-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
4200-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
4201-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
4199+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1
4200+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0
4201+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2
42024202
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
42034203
;
42044204
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_8:
@@ -4229,9 +4229,9 @@ define i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind {
42294229
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ld a1, 0(a1)
42304230
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0
42314231
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
4232-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
4233-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
4234-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
4232+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1
4233+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0
4234+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2
42354235
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
42364236
;
42374237
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_8:

llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,13 +193,13 @@ define i32 @length4(ptr %X, ptr %Y) nounwind {
193193
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
194194
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
195195
; X86-NEXT: movl (%ecx), %ecx
196-
; X86-NEXT: movl (%eax), %edx
196+
; X86-NEXT: movl (%eax), %eax
197197
; X86-NEXT: bswapl %ecx
198-
; X86-NEXT: bswapl %edx
199-
; X86-NEXT: xorl %eax, %eax
200-
; X86-NEXT: cmpl %edx, %ecx
198+
; X86-NEXT: bswapl %eax
199+
; X86-NEXT: cmpl %eax, %ecx
201200
; X86-NEXT: seta %al
202-
; X86-NEXT: sbbl $0, %eax
201+
; X86-NEXT: sbbb $0, %al
202+
; X86-NEXT: movsbl %al, %eax
203203
; X86-NEXT: retl
204204
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
205205
ret i32 %m

llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -179,14 +179,14 @@ define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
179179
define i32 @length4(ptr %X, ptr %Y) nounwind {
180180
; X64-LABEL: length4:
181181
; X64: # %bb.0:
182-
; X64-NEXT: movl (%rdi), %ecx
183-
; X64-NEXT: movl (%rsi), %edx
182+
; X64-NEXT: movl (%rdi), %eax
183+
; X64-NEXT: movl (%rsi), %ecx
184+
; X64-NEXT: bswapl %eax
184185
; X64-NEXT: bswapl %ecx
185-
; X64-NEXT: bswapl %edx
186-
; X64-NEXT: xorl %eax, %eax
187-
; X64-NEXT: cmpl %edx, %ecx
186+
; X64-NEXT: cmpl %ecx, %eax
188187
; X64-NEXT: seta %al
189-
; X64-NEXT: sbbl $0, %eax
188+
; X64-NEXT: sbbb $0, %al
189+
; X64-NEXT: movsbl %al, %eax
190190
; X64-NEXT: retq
191191
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
192192
ret i32 %m
@@ -391,14 +391,14 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
391391
define i32 @length8(ptr %X, ptr %Y) nounwind {
392392
; X64-LABEL: length8:
393393
; X64: # %bb.0:
394-
; X64-NEXT: movq (%rdi), %rcx
395-
; X64-NEXT: movq (%rsi), %rdx
394+
; X64-NEXT: movq (%rdi), %rax
395+
; X64-NEXT: movq (%rsi), %rcx
396+
; X64-NEXT: bswapq %rax
396397
; X64-NEXT: bswapq %rcx
397-
; X64-NEXT: bswapq %rdx
398-
; X64-NEXT: xorl %eax, %eax
399-
; X64-NEXT: cmpq %rdx, %rcx
398+
; X64-NEXT: cmpq %rcx, %rax
400399
; X64-NEXT: seta %al
401-
; X64-NEXT: sbbl $0, %eax
400+
; X64-NEXT: sbbb $0, %al
401+
; X64-NEXT: movsbl %al, %eax
402402
; X64-NEXT: retq
403403
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
404404
ret i32 %m

llvm/test/CodeGen/X86/memcmp-optsize-x32.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,13 @@ define i32 @length4(ptr %X, ptr %Y) nounwind optsize {
122122
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
123123
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
124124
; X86-NEXT: movl (%ecx), %ecx
125-
; X86-NEXT: movl (%eax), %edx
125+
; X86-NEXT: movl (%eax), %eax
126126
; X86-NEXT: bswapl %ecx
127-
; X86-NEXT: bswapl %edx
128-
; X86-NEXT: xorl %eax, %eax
129-
; X86-NEXT: cmpl %edx, %ecx
127+
; X86-NEXT: bswapl %eax
128+
; X86-NEXT: cmpl %eax, %ecx
130129
; X86-NEXT: seta %al
131-
; X86-NEXT: sbbl $0, %eax
130+
; X86-NEXT: sbbb $0, %al
131+
; X86-NEXT: movsbl %al, %eax
132132
; X86-NEXT: retl
133133
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
134134
ret i32 %m

0 commit comments

Comments
 (0)