From dcc1099af82e9b6d328e9690b5b6492edaa0356a Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Thu, 19 Dec 2024 17:45:38 +0800 Subject: [PATCH 1/5] [X86] Support peephole optimization with CCMP instruction This extends `opitimizeCompareInstr` to re-use previous CCMP results if the previous comparison was with an immediates that was 1 bigger or smaller. Example: ``` CCMP x, 13, 2, 5 ... CCMP x, 12, 2, 5 ; can be removed if we change the SETg SETg ... ; x > 12 changed to SETge (x >= 13) & remove the 2nd CCMP ``` --- .../Target/X86/X86InstrConditionalCompare.td | 4 +- llvm/lib/Target/X86/X86InstrInfo.cpp | 14 + .../CodeGen/X86/apx/optimize-compare-ccmp.mir | 312 ++++++++++++++++++ 3 files changed, 328 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir diff --git a/llvm/lib/Target/X86/X86InstrConditionalCompare.td b/llvm/lib/Target/X86/X86InstrConditionalCompare.td index 35af8405f1abe..ba8cf6cc3bc67 100644 --- a/llvm/lib/Target/X86/X86InstrConditionalCompare.td +++ b/llvm/lib/Target/X86/X86InstrConditionalCompare.td @@ -36,7 +36,7 @@ class Ctest o, Format f, X86TypeInfo t, DAGOperand op1, DAGOperand op2>: //===----------------------------------------------------------------------===// // CCMP Instructions // -let SchedRW = [WriteALU] in { +let SchedRW = [WriteALU], isCompare = 1 in { def CCMP8rr : Ccmp<0x38, MRMDestReg, Xi8, GR8, GR8>; def CCMP16rr: Ccmp<0x39, MRMDestReg, Xi16, GR16, GR16>, PD; def CCMP32rr: Ccmp<0x39, MRMDestReg, Xi32, GR32, GR32>; @@ -55,7 +55,7 @@ let SchedRW = [WriteALU] in { def CCMP64ri32: Ccmp<0x81, MRM7r, Xi64, GR64, i64i32imm>; } -let mayLoad = 1 in { +let mayLoad = 1, isCompare = 1 in { let SchedRW = [WriteALU.Folded] in { def CCMP16mi8: Ccmp<0x83, MRM7m, Xi16, i16mem, i16i8imm>, PD; def CCMP32mi8: Ccmp<0x83, MRM7m, Xi32, i32mem, i32i8imm>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 5fe7203c052d8..0b741338934b8 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4854,6 +4854,10 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, case X86::CMP32ri: case X86::CMP16ri: case X86::CMP8ri: + case X86::CCMP64ri32: + case X86::CCMP32ri: + case X86::CCMP16ri: + case X86::CCMP8ri: SrcReg = MI.getOperand(0).getReg(); SrcReg2 = 0; if (MI.getOperand(1).isImm()) { @@ -4951,6 +4955,16 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI, } return false; } + case X86::CCMP64ri32: + case X86::CCMP32ri: + case X86::CCMP16ri: + case X86::CCMP8ri: + // The CCMP instruction should not be optimized if the scc/dfv in it is not + // same as the one in previous CCMP instruction. + if (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm() || + (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm())) + return false; + [[fallthrough]]; case X86::CMP64ri32: case X86::CMP32ri: case X86::CMP16ri: diff --git a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir new file mode 100644 index 0000000000000..1b5ecdfc40e8f --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir @@ -0,0 +1,312 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=x86_64-- -run-pass peephole-opt | FileCheck %s + +--- +name: opt_redundant_flags_adjusted_imm_0 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_0 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags + %0:gr64 = COPY $rsi + ; CCMP+SETCC %0 == 1 + CCMP64ri32 %0, 1, 2, 5, implicit-def $eflags, implicit $eflags + $cl = SETCCr 4, implicit $eflags + ; CCMP+SETCC %0 >= 2; CCMP can be removed. + CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags + ; %0 >=s 2 --> %0 >s 1 + $bl = SETCCr 13, implicit $eflags + ; %0 >=u 2 --> %0 >u 1 + $bl = SETCCr 3, implicit $eflags + ; %0 %0 <=s 1 + $bl = SETCCr 12, implicit $eflags + ; %0 %0 <=u 1 + $bl = SETCCr 2, implicit $eflags +... +--- +name: opt_redundant_flags_adjusted_imm_1 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_1 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CCMP64ri32 [[COPY]], 42, 2, 5, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: $cl = SETCCr 5, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags + %0:gr64 = COPY $rsi + ; CCMP+SETCC %0 != 42 + CCMP64ri32 %0, 42, 2, 5, implicit-def $eflags, implicit $eflags + $cl = SETCCr 5, implicit $eflags + ; CCMP+SETCC %0 >= 2; CCMP can be removed. + CCMP64ri32 %0, 41, 2, 5, implicit-def $eflags, implicit $eflags + ; %0 >s 41 --> %0 >=s 42 + $bl = SETCCr 15, implicit $eflags + ; %0 >u 41 --> %0 >=u 42 + $bl = SETCCr 7, implicit $eflags + ; %0 <=s 41 --> %0 %0 s INT16_MAX + $bl = SETCCr 15, implicit $eflags + + CCMP16ri %0, 65535, 2, 5, implicit-def $eflags, implicit $eflags + ; CCMP should not be removed. + CCMP16ri %0, 32767, 2, 5, implicit-def $eflags, implicit $eflags + $bl = SETCCr 15, implicit $eflags + + CCMP16ri %0, -32768, 2, 5, implicit-def $eflags, implicit $eflags + ; CCMP should not be removed. + CCMP16ri %0, 32767, 2, 5, implicit-def $eflags, implicit $eflags + $bl = SETCCr 14, implicit $eflags + + CCMP16ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags + ; CCMP should not be removed. + CCMP16ri %0, 65535, 2, 5, implicit-def $eflags, implicit $eflags + $bl = SETCCr 4, implicit $eflags + + CCMP16ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags + ; CCMP should not be removed. + CCMP16ri %0, 65535, 2, 5, implicit-def $eflags, implicit $eflags + $bl = SETCCr 6, implicit $eflags +... +--- +name: opt_redundant_flags_adjusted_imm_noopt_3 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_3 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 7, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags, implicit $eflags + ; CHECK-NEXT: CCMP64ri32 [[COPY]], 2, 2, 5, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags + %0:gr64 = COPY $rsi + ; CCMP+SETCC %0 == 1 + CCMP64ri32 %0, 1, 2, 7, implicit-def $eflags, implicit $eflags + $cl = SETCCr 4, implicit $eflags, implicit $eflags + ; CCMP+SETCC %0 >= 2; CCMP should not be removed as the scc and dfv is + ; different. + CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags + $bl = SETCCr 13, implicit $eflags + $bl = SETCCr 3, implicit $eflags + $bl = SETCCr 12, implicit $eflags + $bl = SETCCr 2, implicit $eflags +... +--- +name: opt_redundant_flags_adjusted_imm_noopt_4 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_4 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 5, 5, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags, implicit $eflags + ; CHECK-NEXT: CCMP64ri32 [[COPY]], 2, 2, 5, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags + %0:gr64 = COPY $rsi + ; CCMP+SETCC %0 == 1 + CCMP64ri32 %0, 1, 5, 5, implicit-def $eflags, implicit $eflags + $cl = SETCCr 4, implicit $eflags, implicit $eflags + ; CCMP+SETCC %0 >= 2; CCMP should not be removed as the scc and dfv is + ; different. + CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags + $bl = SETCCr 13, implicit $eflags + $bl = SETCCr 3, implicit $eflags + $bl = SETCCr 12, implicit $eflags + $bl = SETCCr 2, implicit $eflags +... +--- +name: opt_adjusted_imm_multiple_blocks +body: | + ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $eax + ; CHECK-NEXT: CCMP32ri [[COPY]], 20, 2, 5, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: JCC_1 %bb.2, 15, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: RET 0 + bb.0: + %0:gr32 = COPY $eax + CCMP32ri %0, 20, 2, 5, implicit-def $eflags, implicit $eflags + JCC_1 %bb.1, 4, implicit $eflags + JMP_1 %bb.3 + + bb.1: + CCMP32ri %0, 21, 2, 5, implicit-def $eflags, implicit $eflags + JCC_1 %bb.2, 13, implicit $eflags + JMP_1 %bb.3 + + bb.2: + JMP_1 %bb.3 + + bb.3: + RET 0 +... +--- +name: opt_adjusted_imm_multiple_blocks_noopt +body: | + ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks_noopt + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $eax + ; CHECK-NEXT: CCMP32ri [[COPY]], 20, 2, 5, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CCMP32ri [[COPY]], 21, 2, 5, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 13, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: RET 0 + bb.0: + %0:gr32 = COPY $eax + CCMP32ri %0, 20, 2, 5, implicit-def $eflags, implicit $eflags + JCC_1 %bb.1, 4, implicit $eflags + JMP_1 %bb.3 + + bb.1: + CCMP32ri %0, 21, 2, 5, implicit-def $eflags, implicit $eflags + JCC_1 %bb.2, 13, implicit $eflags + JMP_1 %bb.3 + + bb.2: + liveins: $eflags + $al = SETCCr 4, implicit $eflags + + bb.3: + RET 0 +... From 3511b19e20303e860cc8a2cdff7b21330e1c7cb3 Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Mon, 10 Mar 2025 09:37:45 +0800 Subject: [PATCH 2/5] Add checks and LIT tests Check scc and dfv are same if FlagI is CCMP instruction and add more LIT tests. --- llvm/lib/Target/X86/X86InstrInfo.cpp | 12 ++- .../CodeGen/X86/apx/optimize-compare-ccmp.mir | 83 +++++++++++++++++++ 2 files changed, 91 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 0b741338934b8..6f24ff9cd32d1 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4958,13 +4958,17 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI, case X86::CCMP64ri32: case X86::CCMP32ri: case X86::CCMP16ri: - case X86::CCMP8ri: + case X86::CCMP8ri: { // The CCMP instruction should not be optimized if the scc/dfv in it is not // same as the one in previous CCMP instruction. - if (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm() || - (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm())) - return false; + unsigned Opcode = FlagI.getOpcode(); + if (Opcode == X86::CCMP64ri32 || Opcode == X86::CCMP32ri || + Opcode == X86::CCMP16ri || Opcode == X86::CCMP8ri) + if (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm() || + (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm())) + return false; [[fallthrough]]; + } case X86::CMP64ri32: case X86::CMP32ri: case X86::CMP16ri: diff --git a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir index 1b5ecdfc40e8f..96752ceafe542 100644 --- a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir +++ b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir @@ -56,6 +56,89 @@ body: | $bl = SETCCr 6, implicit $eflags ... --- +name: opt_redundant_flags_adjusted_imm_2 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_2 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags + %0:gr64 = COPY $rsi + %1:gr64 = MOV64ri 1 + ; CCMP+SETCC %0 == 1 + ; CCMP64rr will be optimized to CCMP64ri32 in the peephole optimiztion pass + CCMP64rr %0, %1, 2, 5, implicit-def $eflags, implicit $eflags + $cl = SETCCr 4, implicit $eflags + ; CCMP+SETCC %0 >= 2; CCMP can be removed. + CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags + ; %0 >=s 2 --> %0 >s 1 + $bl = SETCCr 13, implicit $eflags + ; %0 >=u 2 --> %0 >u 1 + $bl = SETCCr 3, implicit $eflags + ; %0 %0 <=s 1 + $bl = SETCCr 12, implicit $eflags + ; %0 %0 <=u 1 + $bl = SETCCr 2, implicit $eflags +... +--- +name: opt_redundant_flags_adjusted_imm_3 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_3 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CMP64ri32 [[COPY]], 1, implicit-def $eflags + ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags + %0:gr64 = COPY $rsi + ; CMP+SETCC %0 == 1 + CMP64ri32 %0, 1, implicit-def $eflags + $cl = SETCCr 4, implicit $eflags + ; CCMP+SETCC %0 >= 2; CCMP can be removed. + CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags + ; %0 >=s 2 --> %0 >s 1 + $bl = SETCCr 13, implicit $eflags + ; %0 >=u 2 --> %0 >u 1 + $bl = SETCCr 3, implicit $eflags + ; %0 %0 <=s 1 + $bl = SETCCr 12, implicit $eflags + ; %0 %0 <=u 1 + $bl = SETCCr 2, implicit $eflags +... +--- +name: opt_redundant_flags_adjusted_imm_4 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_4 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags + %0:gr64 = COPY $rsi + ; CCMP+SETCC %0 == 1 + CCMP64ri32 %0, 1, 2, 5, implicit-def $eflags, implicit $eflags + $cl = SETCCr 4, implicit $eflags + ; CMP+SETCC %0 >= 2; CMP can be removed. + CMP64ri32 %0, 2, implicit-def $eflags + ; %0 >=s 2 --> %0 >s 1 + $bl = SETCCr 13, implicit $eflags + ; %0 >=u 2 --> %0 >u 1 + $bl = SETCCr 3, implicit $eflags + ; %0 %0 <=s 1 + $bl = SETCCr 12, implicit $eflags + ; %0 %0 <=u 1 + $bl = SETCCr 2, implicit $eflags +... +--- name: opt_redundant_flags_adjusted_imm_noopt_0 body: | bb.0: From ce9e3abadee22716d7b31dc601963e22b7c9b0ab Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Mon, 10 Mar 2025 23:27:27 +0800 Subject: [PATCH 3/5] Add check to ensure CCMP opcode is same --- llvm/lib/Target/X86/X86InstrInfo.cpp | 10 ++-- .../CodeGen/X86/apx/optimize-compare-ccmp.mir | 55 ++++++++++--------- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 6f24ff9cd32d1..885ebaa79535f 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4961,12 +4961,10 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI, case X86::CCMP8ri: { // The CCMP instruction should not be optimized if the scc/dfv in it is not // same as the one in previous CCMP instruction. - unsigned Opcode = FlagI.getOpcode(); - if (Opcode == X86::CCMP64ri32 || Opcode == X86::CCMP32ri || - Opcode == X86::CCMP16ri || Opcode == X86::CCMP8ri) - if (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm() || - (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm())) - return false; + if ((FlagI.getOpcode() != OI.getOpcode()) || + (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm()) || + (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm())) + return false; [[fallthrough]]; } case X86::CMP64ri32: diff --git a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir index 96752ceafe542..ae6741c5ee882 100644 --- a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir +++ b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir @@ -112,33 +112,6 @@ body: | $bl = SETCCr 2, implicit $eflags ... --- -name: opt_redundant_flags_adjusted_imm_4 -body: | - bb.0: - ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_4 - ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags - ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags - ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags - ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags - ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags - ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags - %0:gr64 = COPY $rsi - ; CCMP+SETCC %0 == 1 - CCMP64ri32 %0, 1, 2, 5, implicit-def $eflags, implicit $eflags - $cl = SETCCr 4, implicit $eflags - ; CMP+SETCC %0 >= 2; CMP can be removed. - CMP64ri32 %0, 2, implicit-def $eflags - ; %0 >=s 2 --> %0 >s 1 - $bl = SETCCr 13, implicit $eflags - ; %0 >=u 2 --> %0 >u 1 - $bl = SETCCr 3, implicit $eflags - ; %0 %0 <=s 1 - $bl = SETCCr 12, implicit $eflags - ; %0 %0 <=u 1 - $bl = SETCCr 2, implicit $eflags -... ---- name: opt_redundant_flags_adjusted_imm_noopt_0 body: | bb.0: @@ -306,6 +279,34 @@ body: | $bl = SETCCr 2, implicit $eflags ... --- +name: opt_redundant_flags_adjusted_imm_noopt_5 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_5 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags + ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags + ; CHECK-NEXT: CMP64ri32 [[COPY]], 2, implicit-def $eflags + ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags + %0:gr64 = COPY $rsi + ; CCMP+SETCC %0 == 1 + CCMP64ri32 %0, 1, 2, 5, implicit-def $eflags, implicit $eflags + $cl = SETCCr 4, implicit $eflags + ; CMP+SETCC %0 >= 2; CMP cannot be removed. + CMP64ri32 %0, 2, implicit-def $eflags + ; %0 >=s 2 --> %0 >s 1 + $bl = SETCCr 13, implicit $eflags + ; %0 >=u 2 --> %0 >u 1 + $bl = SETCCr 3, implicit $eflags + ; %0 %0 <=s 1 + $bl = SETCCr 12, implicit $eflags + ; %0 %0 <=u 1 + $bl = SETCCr 2, implicit $eflags +... +--- name: opt_adjusted_imm_multiple_blocks body: | ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks From df7cb157c6c76df36d47e937c23c9b7b209cf3ba Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Mon, 10 Mar 2025 23:47:24 +0800 Subject: [PATCH 4/5] Add two sub-tests in ccmp LIT test for validate peephole optimization. --- llvm/test/CodeGen/X86/apx/ccmp.ll | 70 +++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/llvm/test/CodeGen/X86/apx/ccmp.ll b/llvm/test/CodeGen/X86/apx/ccmp.ll index 7bd8aeea8863b..e2e4e8df93149 100644 --- a/llvm/test/CodeGen/X86/apx/ccmp.ll +++ b/llvm/test/CodeGen/X86/apx/ccmp.ll @@ -1300,5 +1300,75 @@ if.end: ; preds = %entry, %if.then ret void } +define void @ccmp_continous_adjust_imm(i32 noundef %a, i32 noundef %b) { +; CHECK-LABEL: ccmp_continous_adjust_imm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $2, %edi # encoding: [0x83,0xff,0x02] +; CHECK-NEXT: ccmpll {dfv=} $2, %esi # encoding: [0x62,0xf4,0x04,0x0c,0x83,0xfe,0x02] +; CHECK-NEXT: jg .LBB31_1 # encoding: [0x7f,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB31_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # %bb.2: # %if.then +; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK-NEXT: jmp foo # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB31_1: # %if.end +; CHECK-NEXT: retq # encoding: [0xc3] +; +; NDD-LABEL: ccmp_continous_adjust_imm: +; NDD: # %bb.0: # %entry +; NDD-NEXT: cmpl $2, %edi # encoding: [0x83,0xff,0x02] +; NDD-NEXT: ccmpll {dfv=} $2, %esi # encoding: [0x62,0xf4,0x04,0x0c,0x83,0xfe,0x02] +; NDD-NEXT: jg .LBB31_1 # encoding: [0x7f,A] +; NDD-NEXT: # fixup A - offset: 1, value: .LBB31_1-1, kind: FK_PCRel_1 +; NDD-NEXT: # %bb.2: # %if.then +; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NDD-NEXT: jmp foo # TAILCALL +; NDD-NEXT: # encoding: [0xeb,A] +; NDD-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 +; NDD-NEXT: .LBB31_1: # %if.end +; NDD-NEXT: retq # encoding: [0xc3] +entry: + %cmp = icmp slt i32 %a, 2 + %cmp1 = icmp slt i32 %b, 2 + %or.cond = and i1 %cmp, %cmp1 + %cmp3 = icmp slt i32 %b, 3 + %or.cond4 = and i1 %or.cond, %cmp3 + br i1 %or.cond4, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void (...) @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +define i32 @ccmp_continous_nobranch_adjust_imm(i32 noundef %a, i32 noundef %b) { +; CHECK-LABEL: ccmp_continous_nobranch_adjust_imm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $2, %esi # encoding: [0x83,0xfe,0x02] +; CHECK-NEXT: ccmpgl {dfv=} $2, %edi # encoding: [0x62,0xf4,0x04,0x0f,0x83,0xff,0x02] +; CHECK-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0] +; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] +; CHECK-NEXT: retq # encoding: [0xc3] +; +; NDD-LABEL: ccmp_continous_nobranch_adjust_imm: +; NDD: # %bb.0: # %entry +; NDD-NEXT: cmpl $2, %esi # encoding: [0x83,0xfe,0x02] +; NDD-NEXT: ccmpgl {dfv=} $2, %edi # encoding: [0x62,0xf4,0x04,0x0f,0x83,0xff,0x02] +; NDD-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0] +; NDD-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] +; NDD-NEXT: retq # encoding: [0xc3] +entry: + %cmp = icmp sgt i32 %a, 1 + %cmp1 = icmp slt i32 %b, 2 + %cmp2 = icmp slt i32 %b, 3 + %or1 = or i1 %cmp, %cmp1 + %or2 = or i1 %or1, %cmp2 + %. = zext i1 %or2 to i32 + ret i32 %. +} + declare dso_local void @foo(...) declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone From 1d44f40afe047b6218e1572b4e29a89b2d9c1ec4 Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Tue, 11 Mar 2025 11:41:11 +0800 Subject: [PATCH 5/5] Update comments in test. --- llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir | 2 +- llvm/test/CodeGen/X86/optimize-compare.mir | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir index ae6741c5ee882..f766ec00cbc3c 100644 --- a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir +++ b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir @@ -44,7 +44,7 @@ body: | ; CCMP+SETCC %0 != 42 CCMP64ri32 %0, 42, 2, 5, implicit-def $eflags, implicit $eflags $cl = SETCCr 5, implicit $eflags - ; CCMP+SETCC %0 >= 2; CCMP can be removed. + ; CCMP+SETCC %0 > 41; CCMP can be removed. CCMP64ri32 %0, 41, 2, 5, implicit-def $eflags, implicit $eflags ; %0 >s 41 --> %0 >=s 42 $bl = SETCCr 15, implicit $eflags diff --git a/llvm/test/CodeGen/X86/optimize-compare.mir b/llvm/test/CodeGen/X86/optimize-compare.mir index 36ab851a85153..789a5c0100b50 100644 --- a/llvm/test/CodeGen/X86/optimize-compare.mir +++ b/llvm/test/CodeGen/X86/optimize-compare.mir @@ -422,7 +422,7 @@ body: | ; CMP+SETCC %0 != 42 CMP64ri32 %0, 42, implicit-def $eflags $cl = SETCCr 5, implicit $eflags - ; CMP+SETCC %0 >= 2; CMP can be removed. + ; CMP+SETCC %0 > 41; CMP can be removed. CMP64ri32 %0, 41, implicit-def $eflags ; %0 >s 41 --> %0 >=s 42 $bl = SETCCr 15, implicit $eflags