-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[X86] Match SETCC_CARRY in addition of SUB when trying to reconstruct LEA #126551
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-x86 Author: None (deadalnix) ChangesAs per title. This is not a huge deal at the moment, but becomes one when matching node in topological order in the DAGCombiner, and is generally more stable than the existing. Patch is 40.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126551.diff 12 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 34ac4262beb85d3..601f51ec9b8a4fb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51985,21 +51985,27 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
return R;
// (0 - SetCC) | C -> (zext (not SetCC)) * (C + 1) - 1 if we can get a LEA out of it.
- if ((VT == MVT::i32 || VT == MVT::i64) &&
- N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
- isNullConstant(N0.getOperand(0))) {
- SDValue Cond = N0.getOperand(1);
- if (Cond.getOpcode() == ISD::ZERO_EXTEND && Cond.hasOneUse())
- Cond = Cond.getOperand(0);
-
- if (Cond.getOpcode() == X86ISD::SETCC && Cond.hasOneUse()) {
- if (auto *CN = dyn_cast<ConstantSDNode>(N1)) {
- uint64_t Val = CN->getZExtValue();
- if (Val == 1 || Val == 2 || Val == 3 || Val == 4 || Val == 7 || Val == 8) {
- X86::CondCode CCode = (X86::CondCode)Cond.getConstantOperandVal(0);
- CCode = X86::GetOppositeBranchCondition(CCode);
- SDValue NotCond = getSETCC(CCode, Cond.getOperand(1), SDLoc(Cond), DAG);
+ if (VT == MVT::i32 || VT == MVT::i64) {
+ if (auto *CN = dyn_cast<ConstantSDNode>(N1)) {
+ unsigned Val = CN->getZExtValue();
+ if (Val == 1 || Val == 2 || Val == 3 || Val == 4 || Val == 7 || Val == 8) {
+ SDValue NotCond;
+ if (N0.getOpcode() == X86ISD::SETCC_CARRY && N0.hasOneUse() && N0.getOperand(1).hasOneUse()) {
+ X86::CondCode OldCC = (X86::CondCode)N0.getConstantOperandVal(0);
+ X86::CondCode NewCC = X86::GetOppositeBranchCondition(OldCC);
+ NotCond = getSETCC(NewCC, N0.getOperand(1), SDLoc(N0), DAG);
+ } else if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isNullConstant(N0.getOperand(0))) {
+ SDValue Cond = N0.getOperand(1);
+ if (Cond.getOpcode() == ISD::ZERO_EXTEND && Cond.hasOneUse())
+ Cond = Cond.getOperand(0);
+ if (Cond.getOpcode() == X86ISD::SETCC && Cond.hasOneUse()) {
+ X86::CondCode OldCC = (X86::CondCode)Cond.getConstantOperandVal(0);
+ X86::CondCode NewCC = X86::GetOppositeBranchCondition(OldCC);
+ NotCond = getSETCC(NewCC, Cond.getOperand(1), SDLoc(Cond), DAG);
+ }
+ }
+ if (NotCond) {
SDValue R = DAG.getZExtOrTrunc(NotCond, dl, VT);
R = DAG.getNode(ISD::MUL, dl, VT, R, DAG.getConstant(Val + 1, dl, VT));
R = DAG.getNode(ISD::SUB, dl, VT, R, DAG.getConstant(1, dl, VT));
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
index 62935f7e372b3ab..7d1422d3c961eb7 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
@@ -159,10 +159,9 @@ define i32 @length3(ptr %X, ptr %Y) nounwind {
; X86-NEXT: popl %esi
; X86-NEXT: retl
; X86-NEXT: .LBB9_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpw %si, %dx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
@@ -284,10 +283,9 @@ define i32 @length5(ptr %X, ptr %Y) nounwind {
; X86-NEXT: popl %esi
; X86-NEXT: retl
; X86-NEXT: .LBB16_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
@@ -330,10 +328,9 @@ define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB18_2
; X86-NEXT: .LBB18_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB18_2: # %endblock
; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
@@ -367,8 +364,8 @@ define i32 @length7(ptr %X, ptr %Y) nounwind {
; X86-NEXT: .LBB19_2: # %res_block
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB19_3: # %endblock
; X86-NEXT: popl %esi
; X86-NEXT: retl
@@ -416,8 +413,8 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
; X86-NEXT: .LBB21_2: # %res_block
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB21_3: # %endblock
; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
@@ -451,8 +448,8 @@ define i32 @length8(ptr %X, ptr %Y) nounwind {
; X86-NEXT: .LBB22_2: # %res_block
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB22_3: # %endblock
; X86-NEXT: popl %esi
; X86-NEXT: retl
@@ -613,8 +610,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind {
; X86-NEXT: .LBB29_3: # %res_block
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB29_4: # %endblock
; X86-NEXT: popl %esi
; X86-NEXT: retl
@@ -738,8 +735,8 @@ define i32 @length16(ptr %X, ptr %Y) nounwind {
; X86-NEXT: .LBB33_4: # %res_block
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB33_5: # %endblock
; X86-NEXT: popl %esi
; X86-NEXT: retl
@@ -852,8 +849,8 @@ define i1 @length16_lt(ptr %x, ptr %y) nounwind {
; X86-NEXT: .LBB35_4: # %res_block
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB35_5: # %endblock
; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
@@ -901,8 +898,8 @@ define i1 @length16_gt(ptr %x, ptr %y) nounwind {
; X86-NEXT: .LBB36_4: # %res_block
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpl %ecx, %eax
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: orl $1, %edx
+; X86-NEXT: setae %dl
+; X86-NEXT: leal -1(%edx,%edx), %edx
; X86-NEXT: .LBB36_5: # %endblock
; X86-NEXT: testl %edx, %edx
; X86-NEXT: setg %al
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
index 9bbd335a903be91..3a3824a4ffe83e6 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
@@ -139,11 +139,11 @@ define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
define i32 @length3(ptr %X, ptr %Y) nounwind {
; X64-LABEL: length3:
; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %ecx
-; X64-NEXT: movzwl (%rsi), %edx
+; X64-NEXT: movzwl (%rdi), %eax
+; X64-NEXT: movzwl (%rsi), %ecx
+; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
-; X64-NEXT: rolw $8, %dx
-; X64-NEXT: cmpw %dx, %cx
+; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB9_3
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax
@@ -151,10 +151,9 @@ define i32 @length3(ptr %X, ptr %Y) nounwind {
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB9_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpw %dx, %cx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
ret i32 %m
@@ -248,11 +247,11 @@ define i1 @length4_eq_const(ptr %X) nounwind {
define i32 @length5(ptr %X, ptr %Y) nounwind {
; X64-LABEL: length5:
; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl (%rsi), %ecx
+; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
+; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB16_3
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax
@@ -260,10 +259,9 @@ define i32 @length5(ptr %X, ptr %Y) nounwind {
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB16_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
ret i32 %m
@@ -288,11 +286,11 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
; X64-LABEL: length5_lt:
; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl (%rsi), %ecx
+; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
+; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB18_3
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax
@@ -302,10 +300,9 @@ define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
; X64-NEXT: .LBB18_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
@@ -334,8 +331,8 @@ define i32 @length7(ptr %X, ptr %Y) nounwind {
; X64-NEXT: .LBB19_2: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB19_3: # %endblock
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind
@@ -377,8 +374,8 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
; X64-NEXT: .LBB21_2: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB21_3: # %endblock
; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -510,8 +507,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind {
; X64-NEXT: .LBB29_2: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB29_3: # %endblock
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
@@ -585,8 +582,8 @@ define i32 @length16(ptr %X, ptr %Y) nounwind {
; X64-NEXT: .LBB33_2: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB33_3: # %endblock
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
@@ -655,8 +652,8 @@ define i1 @length16_lt(ptr %x, ptr %y) nounwind {
; X64-NEXT: .LBB35_2: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB35_3: # %endblock
; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -686,8 +683,8 @@ define i1 @length16_gt(ptr %x, ptr %y) nounwind {
; X64-NEXT: .LBB36_2: # %res_block
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbl %edx, %edx
-; X64-NEXT: orl $1, %edx
+; X64-NEXT: setae %dl
+; X64-NEXT: leal -1(%rdx,%rdx), %edx
; X64-NEXT: .LBB36_3: # %endblock
; X64-NEXT: testl %edx, %edx
; X64-NEXT: setg %al
@@ -766,8 +763,8 @@ define i32 @length24(ptr %X, ptr %Y) nounwind {
; X64-NEXT: .LBB38_3: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB38_4: # %endblock
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
@@ -858,8 +855,8 @@ define i1 @length24_lt(ptr %x, ptr %y) nounwind {
; X64-NEXT: .LBB40_3: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB40_4: # %endblock
; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -896,8 +893,8 @@ define i1 @length24_gt(ptr %x, ptr %y) nounwind {
; X64-NEXT: .LBB41_3: # %res_block
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbl %edx, %edx
-; X64-NEXT: orl $1, %edx
+; X64-NEXT: setae %dl
+; X64-NEXT: leal -1(%rdx,%rdx), %edx
; X64-NEXT: .LBB41_4: # %endblock
; X64-NEXT: testl %edx, %edx
; X64-NEXT: setg %al
@@ -993,8 +990,8 @@ define i32 @length31(ptr %X, ptr %Y) nounwind {
; X64-NEXT: .LBB43_4: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB43_5: # %endblock
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind
@@ -1091,8 +1088,8 @@ define i1 @length31_lt(ptr %x, ptr %y) nounwind {
; X64-NEXT: .LBB45_4: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB45_5: # %endblock
; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -1136,8 +1133,8 @@ define i1 @length31_gt(ptr %x, ptr %y) nounwind {
; X64-NEXT: .LBB46_4: # %res_block
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbl %edx, %edx
-; X64-NEXT: orl $1, %edx
+; X64-NEXT: setae %dl
+; X64-NEXT: leal -1(%rdx,%rdx), %edx
; X64-NEXT: .LBB46_5: # %endblock
; X64-NEXT: testl %edx, %edx
; X64-NEXT: setg %al
@@ -1289,8 +1286,8 @@ define i32 @length32(ptr %X, ptr %Y) nounwind {
; X64-NEXT: .LBB49_4: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB49_5: # %endblock
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
@@ -1402,8 +1399,8 @@ define i1 @length32_lt(ptr %x, ptr %y) nounwind {
; X64-NEXT: .LBB51_4: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB51_5: # %endblock
; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -1447,8 +1444,8 @@ define i1 @length32_gt(ptr %x, ptr %y) nounwind {
; X64-NEXT: .LBB52_4: # %res_block
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbl %edx, %edx
-; X64-NEXT: orl $1, %edx
+; X64-NEXT: setae %dl
+; X64-NEXT: leal -1(%rdx,%rdx), %edx
; X64-NEXT: .LBB52_5: # %endblock
; X64-NEXT: testl %edx, %edx
; X64-NEXT: setg %al
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
index 3a16ab656b11fa2..09f02c3f56346cb 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
@@ -87,10 +87,9 @@ define i32 @length3(ptr %X, ptr %Y) nounwind optsize {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB4_2
; X86-NEXT: .LBB4_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpw %si, %dx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB4_2: # %endblock
; X86-NEXT: popl %esi
; X86-NEXT: retl
@@ -178,10 +177,9 @@ define i32 @length5(ptr %X, ptr %Y) nounwind optsize {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB9_2
; X86-NEXT: .LBB9_3: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB9_2: # %endblock
; X86-NEXT: popl %esi
; X86-NEXT: retl
@@ -230,8 +228,8 @@ define i32 @length8(ptr %X, ptr %Y) nounwind optsize {
; X86-NEXT: .LBB11_2: # %res_block
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl $1, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB11_3: # %endblock
; X86-NEXT: popl %esi
; X86-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll
index 0f817b2c727c337..4fe67fa0883de30 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll
@@ -67,11 +67,11 @@ define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind optsize {
define i32 @length3(ptr %X, ptr %Y) nounwind optsize {
; X64-LABEL: length3:
; X64: # %bb.0:
-; X64-NEXT: movzwl (%rdi), %ecx
-; X64-NEXT: movzwl (%rsi), %edx
+; X64-NEXT: movzwl (%rdi), %eax
+; X64-NEXT: movzwl (%rsi), %ecx
+; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
-; X64-NEXT: rolw $8, %dx
-; X64-NEXT: cmpw %dx, %cx
+; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB4_3
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax
@@ -79,10 +79,9 @@ define i32 @length3(ptr %X, ptr %Y) nounwind optsize {
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB4_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpw %dx, %cx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
ret i32 %m
@@ -146,11 +145,11 @@ define i1 @length4_eq_const(ptr %X) nounwind optsize {
define i32 @length5(ptr %X, ptr %Y) nounwind optsize {
; X64-LABEL: length5:
; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl (%rsi), %ecx
+; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: cmpl %edx, %ecx
+; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB9_3
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax
@@ -158,10 +157,9 @@ define i32 @length5(ptr %X, ptr %Y) nounwind optsize {
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB9_3: # %res_block
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl $1, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: m...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
387ac43 to
f26bba2
Compare
|
Fixed the formatting. |
|
Thanks @RKSimon . Any feedback on this @phoebewang ? Thanks. |
| SDValue NotCond = getSETCC(CCode, Cond.getOperand(1), SDLoc(Cond), DAG); | ||
| if (VT == MVT::i32 || VT == MVT::i64) { | ||
| if (auto *CN = dyn_cast<ConstantSDNode>(N1)) { | ||
| unsigned Val = CN->getZExtValue(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why change it to unsigned?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's an error on my part.
| @@ -51985,21 +51985,30 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, | |||
| return R; | |||
|
|
|||
| // (0 - SetCC) | C -> (zext (not SetCC)) * (C + 1) - 1 if we can get a LEA out of it. | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Update comments for SETCC_CARRY?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure.
| if (N0.getOpcode() == X86ISD::SETCC_CARRY && N0.hasOneUse() && | ||
| N0.getOperand(1).hasOneUse()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
N0.hasOneUse() && N0.getOperand(1).hasOneUse() can be hoisted out?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we really check N0.getOperand(1) before checking the opcode of N0? I don't think that would be correct. But I can lift N0.hasOneUse().
f26bba2 to
93fe9a4
Compare
deadalnix
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comment added, condition hoisted and erroneous unsigned fixed.
| SDValue NotCond = getSETCC(CCode, Cond.getOperand(1), SDLoc(Cond), DAG); | ||
| if (VT == MVT::i32 || VT == MVT::i64) { | ||
| if (auto *CN = dyn_cast<ConstantSDNode>(N1)) { | ||
| unsigned Val = CN->getZExtValue(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's an error on my part.
| if (N0.getOpcode() == X86ISD::SETCC_CARRY && N0.hasOneUse() && | ||
| N0.getOperand(1).hasOneUse()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we really check N0.getOperand(1) before checking the opcode of N0? I don't think that would be correct. But I can lift N0.hasOneUse().
| @@ -51985,21 +51985,30 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, | |||
| return R; | |||
|
|
|||
| // (0 - SetCC) | C -> (zext (not SetCC)) * (C + 1) - 1 if we can get a LEA out of it. | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure.
phoebewang
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
| uint64_t Val = CN->getZExtValue(); | ||
| if ((Val == 1 || Val == 2 || Val == 3 || Val == 4 || Val == 7 || | ||
| Val == 8) && | ||
| N0.hasOneUse()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: this can be moved to line 520196.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I meant move N0.hasOneUse() to line 52016..
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I missed that, updating again.
93fe9a4 to
8de5865
Compare
|
What's the process to merge into main these days? |
I can merge for you if no objections in 24h. |
Its easiest just to use the squash and merge button below once the CI is green. |
8de5865 to
fd7f43c
Compare
Right, you can merge it by yourself. I thought you don't have the write access to LLVM. |
The reason I'm asking is because I don't have the merge button on my side. Who should I ping about this? |
I see. I merged it for you. |
|
@deadalnix best to create an issue labeled with infra:commit-access-request |
As per title.
This is not a huge deal at the moment, but becomes one when matching node in topological order in the DAGCombiner, and is generally more stable than the existing.