From c1cb8b072a2e0051bd15868f0817c95d028d5077 Mon Sep 17 00:00:00 2001 From: Harsh Chandel Date: Thu, 16 Oct 2025 14:24:55 +0530 Subject: [PATCH 1/6] SFB with max Change-Id: I7585f98422bf4b101fd44b1b4d6bc8584ca8cb53 --- llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp | 2 ++ llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 1 + llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td | 1 + 3 files changed, 4 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 410561855e181..ff96ed9a6b5d0 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -127,6 +127,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, case RISCV::PseudoCCAND: case RISCV::PseudoCCOR: case RISCV::PseudoCCXOR: + case RISCV::PseudoCCMAX: case RISCV::PseudoCCADDW: case RISCV::PseudoCCSUBW: case RISCV::PseudoCCSLL: @@ -228,6 +229,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, case RISCV::PseudoCCAND: NewOpc = RISCV::AND; break; case RISCV::PseudoCCOR: NewOpc = RISCV::OR; break; case RISCV::PseudoCCXOR: NewOpc = RISCV::XOR; break; + case RISCV::PseudoCCMAX: NewOpc = RISCV::MAX; break; case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break; case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break; case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 96e1078467f19..c27a96305f1c0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1698,6 +1698,7 @@ unsigned getPredicatedOpcode(unsigned Opcode) { case RISCV::AND: return RISCV::PseudoCCAND; break; case RISCV::OR: return RISCV::PseudoCCOR; break; case RISCV::XOR: return RISCV::PseudoCCXOR; break; + case RISCV::MAX: return RISCV::PseudoCCMAX; break; case RISCV::ADDI: return RISCV::PseudoCCADDI; break; case RISCV::SLLI: return RISCV::PseudoCCSLLI; break; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td index 0114fbdc56302..5ca145d6c458c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td @@ -106,6 +106,7 @@ def PseudoCCSRA : SFBALU_rr; def PseudoCCAND : SFBALU_rr; def PseudoCCOR : SFBALU_rr; def PseudoCCXOR : SFBALU_rr; +def PseudoCCMAX : SFBALU_rr; def PseudoCCADDI : SFBALU_ri; def PseudoCCANDI : SFBALU_ri; From a28faada306dac91eb7aff718e19b8fc89c6b1ba Mon Sep 17 00:00:00 2001 From: Harsh Chandel Date: Tue, 21 Oct 2025 11:44:07 +0530 Subject: [PATCH 2/6] SFB with min, maxu and minu Change-Id: I5a2deafae906b518f3379b2c4ba625cf0a76df79 --- .../Target/RISCV/RISCVExpandPseudoInsts.cpp | 6 + llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 3 + llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td | 3 + .../RISCV/short-forward-branch-opt-min-max.ll | 539 ++++++++++++++++++ 4 files changed, 551 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index ff96ed9a6b5d0..567a8da50a1db 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -128,6 +128,9 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, case RISCV::PseudoCCOR: case RISCV::PseudoCCXOR: case RISCV::PseudoCCMAX: + case RISCV::PseudoCCMAXU: + case RISCV::PseudoCCMIN: + case RISCV::PseudoCCMINU: case RISCV::PseudoCCADDW: case RISCV::PseudoCCSUBW: case RISCV::PseudoCCSLL: @@ -230,6 +233,9 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, case RISCV::PseudoCCOR: NewOpc = RISCV::OR; break; case RISCV::PseudoCCXOR: NewOpc = RISCV::XOR; break; case RISCV::PseudoCCMAX: NewOpc = RISCV::MAX; break; + case RISCV::PseudoCCMIN: NewOpc = RISCV::MIN; break; + case RISCV::PseudoCCMAXU: NewOpc = RISCV::MAXU; break; + case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break; case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break; case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break; case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 8b2eb3af59c4e..435df1e4b91b6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1699,6 +1699,9 @@ unsigned getPredicatedOpcode(unsigned Opcode) { case RISCV::OR: return RISCV::PseudoCCOR; break; case RISCV::XOR: return RISCV::PseudoCCXOR; break; case RISCV::MAX: return RISCV::PseudoCCMAX; break; + case RISCV::MAXU: return RISCV::PseudoCCMAXU; break; + case RISCV::MIN: return RISCV::PseudoCCMIN; break; + case RISCV::MINU: return RISCV::PseudoCCMINU; break; case RISCV::ADDI: return RISCV::PseudoCCADDI; break; case RISCV::SLLI: return RISCV::PseudoCCSLLI; break; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td index 5ca145d6c458c..5a67a5aaba293 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td @@ -107,6 +107,9 @@ def PseudoCCAND : SFBALU_rr; def PseudoCCOR : SFBALU_rr; def PseudoCCXOR : SFBALU_rr; def PseudoCCMAX : SFBALU_rr; +def PseudoCCMIN : SFBALU_rr; +def PseudoCCMAXU : SFBALU_rr; +def PseudoCCMINU : SFBALU_rr; def PseudoCCADDI : SFBALU_ri; def PseudoCCANDI : SFBALU_ri; diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll new file mode 100644 index 0000000000000..9fa4e350aced9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll @@ -0,0 +1,539 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I-NOZBB +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I-NOZBB +; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb,+short-forward-branch-opt | \ +; RUN: FileCheck %s --check-prefixes=RV32I-SFB-ZBB +; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+short-forward-branch-opt | \ +; RUN: FileCheck %s --check-prefixes=RV64I-SFB-ZBB + +define i32 @select_example_smax(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { +; RV32I-NOZBB-LABEL: select_example_smax: +; RV32I-NOZBB: # %bb.0: # %entry +; RV32I-NOZBB-NEXT: bge a3, a0, .LBB0_3 +; RV32I-NOZBB-NEXT: # %bb.1: # %entry +; RV32I-NOZBB-NEXT: beqz a2, .LBB0_4 +; RV32I-NOZBB-NEXT: .LBB0_2: # %entry +; RV32I-NOZBB-NEXT: ret +; RV32I-NOZBB-NEXT: .LBB0_3: # %entry +; RV32I-NOZBB-NEXT: mv a0, a3 +; RV32I-NOZBB-NEXT: bnez a2, .LBB0_2 +; RV32I-NOZBB-NEXT: .LBB0_4: # %entry +; RV32I-NOZBB-NEXT: mv a0, a1 +; RV32I-NOZBB-NEXT: ret +; +; RV64I-NOZBB-LABEL: select_example_smax: +; RV64I-NOZBB: # %bb.0: # %entry +; RV64I-NOZBB-NEXT: sext.w a0, a0 +; RV64I-NOZBB-NEXT: sext.w a3, a3 +; RV64I-NOZBB-NEXT: bge a3, a0, .LBB0_3 +; RV64I-NOZBB-NEXT: # %bb.1: # %entry +; RV64I-NOZBB-NEXT: beqz a2, .LBB0_4 +; RV64I-NOZBB-NEXT: .LBB0_2: # %entry +; RV64I-NOZBB-NEXT: ret +; RV64I-NOZBB-NEXT: .LBB0_3: # %entry +; RV64I-NOZBB-NEXT: mv a0, a3 +; RV64I-NOZBB-NEXT: bnez a2, .LBB0_2 +; RV64I-NOZBB-NEXT: .LBB0_4: # %entry +; RV64I-NOZBB-NEXT: mv a0, a1 +; RV64I-NOZBB-NEXT: ret +; +; RV32I-SFB-ZBB-LABEL: select_example_smax: +; RV32I-SFB-ZBB: # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT: beqz a2, .LBB0_2 +; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT: max a1, a0, a3 +; RV32I-SFB-ZBB-NEXT: .LBB0_2: # %entry +; RV32I-SFB-ZBB-NEXT: mv a0, a1 +; RV32I-SFB-ZBB-NEXT: ret +; +; RV64I-SFB-ZBB-LABEL: select_example_smax: +; RV64I-SFB-ZBB: # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT: sext.w a3, a3 +; RV64I-SFB-ZBB-NEXT: sext.w a0, a0 +; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB0_2 +; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT: max a1, a0, a3 +; RV64I-SFB-ZBB-NEXT: .LBB0_2: # %entry +; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: ret +entry: + %res = call i32 @llvm.smax.i32(i32 %a, i32 %y) + %sel = select i1 %x, i32 %res, i32 %b + ret i32 %sel +} + +define i32 @select_example_smin(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { +; RV32I-NOZBB-LABEL: select_example_smin: +; RV32I-NOZBB: # %bb.0: # %entry +; RV32I-NOZBB-NEXT: bge a0, a3, .LBB1_3 +; RV32I-NOZBB-NEXT: # %bb.1: # %entry +; RV32I-NOZBB-NEXT: beqz a2, .LBB1_4 +; RV32I-NOZBB-NEXT: .LBB1_2: # %entry +; RV32I-NOZBB-NEXT: ret +; RV32I-NOZBB-NEXT: .LBB1_3: # %entry +; RV32I-NOZBB-NEXT: mv a0, a3 +; RV32I-NOZBB-NEXT: bnez a2, .LBB1_2 +; RV32I-NOZBB-NEXT: .LBB1_4: # %entry +; RV32I-NOZBB-NEXT: mv a0, a1 +; RV32I-NOZBB-NEXT: ret +; +; RV64I-NOZBB-LABEL: select_example_smin: +; RV64I-NOZBB: # %bb.0: # %entry +; RV64I-NOZBB-NEXT: sext.w a3, a3 +; RV64I-NOZBB-NEXT: sext.w a0, a0 +; RV64I-NOZBB-NEXT: bge a0, a3, .LBB1_3 +; RV64I-NOZBB-NEXT: # %bb.1: # %entry +; RV64I-NOZBB-NEXT: beqz a2, .LBB1_4 +; RV64I-NOZBB-NEXT: .LBB1_2: # %entry +; RV64I-NOZBB-NEXT: ret +; RV64I-NOZBB-NEXT: .LBB1_3: # %entry +; RV64I-NOZBB-NEXT: mv a0, a3 +; RV64I-NOZBB-NEXT: bnez a2, .LBB1_2 +; RV64I-NOZBB-NEXT: .LBB1_4: # %entry +; RV64I-NOZBB-NEXT: mv a0, a1 +; RV64I-NOZBB-NEXT: ret +; +; RV32I-SFB-ZBB-LABEL: select_example_smin: +; RV32I-SFB-ZBB: # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT: beqz a2, .LBB1_2 +; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT: min a1, a0, a3 +; RV32I-SFB-ZBB-NEXT: .LBB1_2: # %entry +; RV32I-SFB-ZBB-NEXT: mv a0, a1 +; RV32I-SFB-ZBB-NEXT: ret +; +; RV64I-SFB-ZBB-LABEL: select_example_smin: +; RV64I-SFB-ZBB: # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT: sext.w a3, a3 +; RV64I-SFB-ZBB-NEXT: sext.w a0, a0 +; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB1_2 +; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT: min a1, a0, a3 +; RV64I-SFB-ZBB-NEXT: .LBB1_2: # %entry +; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: ret +entry: + %res = call i32 @llvm.smin.i32(i32 %a, i32 %y) + %sel = select i1 %x, i32 %res, i32 %b + ret i32 %sel +} + +define i32 @select_example_umax(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { +; RV32I-NOZBB-LABEL: select_example_umax: +; RV32I-NOZBB: # %bb.0: # %entry +; RV32I-NOZBB-NEXT: bgeu a3, a0, .LBB2_3 +; RV32I-NOZBB-NEXT: # %bb.1: # %entry +; RV32I-NOZBB-NEXT: beqz a2, .LBB2_4 +; RV32I-NOZBB-NEXT: .LBB2_2: # %entry +; RV32I-NOZBB-NEXT: ret +; RV32I-NOZBB-NEXT: .LBB2_3: # %entry +; RV32I-NOZBB-NEXT: mv a0, a3 +; RV32I-NOZBB-NEXT: bnez a2, .LBB2_2 +; RV32I-NOZBB-NEXT: .LBB2_4: # %entry +; RV32I-NOZBB-NEXT: mv a0, a1 +; RV32I-NOZBB-NEXT: ret +; +; RV64I-NOZBB-LABEL: select_example_umax: +; RV64I-NOZBB: # %bb.0: # %entry +; RV64I-NOZBB-NEXT: sext.w a0, a0 +; RV64I-NOZBB-NEXT: sext.w a3, a3 +; RV64I-NOZBB-NEXT: bgeu a3, a0, .LBB2_3 +; RV64I-NOZBB-NEXT: # %bb.1: # %entry +; RV64I-NOZBB-NEXT: beqz a2, .LBB2_4 +; RV64I-NOZBB-NEXT: .LBB2_2: # %entry +; RV64I-NOZBB-NEXT: ret +; RV64I-NOZBB-NEXT: .LBB2_3: # %entry +; RV64I-NOZBB-NEXT: mv a0, a3 +; RV64I-NOZBB-NEXT: bnez a2, .LBB2_2 +; RV64I-NOZBB-NEXT: .LBB2_4: # %entry +; RV64I-NOZBB-NEXT: mv a0, a1 +; RV64I-NOZBB-NEXT: ret +; +; RV32I-SFB-ZBB-LABEL: select_example_umax: +; RV32I-SFB-ZBB: # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT: beqz a2, .LBB2_2 +; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT: maxu a1, a0, a3 +; RV32I-SFB-ZBB-NEXT: .LBB2_2: # %entry +; RV32I-SFB-ZBB-NEXT: mv a0, a1 +; RV32I-SFB-ZBB-NEXT: ret +; +; RV64I-SFB-ZBB-LABEL: select_example_umax: +; RV64I-SFB-ZBB: # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT: sext.w a3, a3 +; RV64I-SFB-ZBB-NEXT: sext.w a0, a0 +; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB2_2 +; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT: maxu a1, a0, a3 +; RV64I-SFB-ZBB-NEXT: .LBB2_2: # %entry +; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: ret +entry: + %res = call i32 @llvm.umax.i32(i32 %a, i32 %y) + %sel = select i1 %x, i32 %res, i32 %b + ret i32 %sel +} + +define i32 @select_example_umin(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { +; RV32I-NOZBB-LABEL: select_example_umin: +; RV32I-NOZBB: # %bb.0: # %entry +; RV32I-NOZBB-NEXT: bgeu a0, a3, .LBB3_3 +; RV32I-NOZBB-NEXT: # %bb.1: # %entry +; RV32I-NOZBB-NEXT: beqz a2, .LBB3_4 +; RV32I-NOZBB-NEXT: .LBB3_2: # %entry +; RV32I-NOZBB-NEXT: ret +; RV32I-NOZBB-NEXT: .LBB3_3: # %entry +; RV32I-NOZBB-NEXT: mv a0, a3 +; RV32I-NOZBB-NEXT: bnez a2, .LBB3_2 +; RV32I-NOZBB-NEXT: .LBB3_4: # %entry +; RV32I-NOZBB-NEXT: mv a0, a1 +; RV32I-NOZBB-NEXT: ret +; +; RV64I-NOZBB-LABEL: select_example_umin: +; RV64I-NOZBB: # %bb.0: # %entry +; RV64I-NOZBB-NEXT: sext.w a3, a3 +; RV64I-NOZBB-NEXT: sext.w a0, a0 +; RV64I-NOZBB-NEXT: bgeu a0, a3, .LBB3_3 +; RV64I-NOZBB-NEXT: # %bb.1: # %entry +; RV64I-NOZBB-NEXT: beqz a2, .LBB3_4 +; RV64I-NOZBB-NEXT: .LBB3_2: # %entry +; RV64I-NOZBB-NEXT: ret +; RV64I-NOZBB-NEXT: .LBB3_3: # %entry +; RV64I-NOZBB-NEXT: mv a0, a3 +; RV64I-NOZBB-NEXT: bnez a2, .LBB3_2 +; RV64I-NOZBB-NEXT: .LBB3_4: # %entry +; RV64I-NOZBB-NEXT: mv a0, a1 +; RV64I-NOZBB-NEXT: ret +; +; RV32I-SFB-ZBB-LABEL: select_example_umin: +; RV32I-SFB-ZBB: # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT: beqz a2, .LBB3_2 +; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT: minu a1, a0, a3 +; RV32I-SFB-ZBB-NEXT: .LBB3_2: # %entry +; RV32I-SFB-ZBB-NEXT: mv a0, a1 +; RV32I-SFB-ZBB-NEXT: ret +; +; RV64I-SFB-ZBB-LABEL: select_example_umin: +; RV64I-SFB-ZBB: # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT: sext.w a3, a3 +; RV64I-SFB-ZBB-NEXT: sext.w a0, a0 +; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB3_2 +; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT: minu a1, a0, a3 +; RV64I-SFB-ZBB-NEXT: .LBB3_2: # %entry +; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: ret +entry: + %res = call i32 @llvm.umin.i32(i32 %a, i32 %y) + %sel = select i1 %x, i32 %res, i32 %b + ret i32 %sel +} + +define i64 @select_example_smax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { +; RV32I-NOZBB-LABEL: select_example_smax_1: +; RV32I-NOZBB: # %bb.0: # %entry +; RV32I-NOZBB-NEXT: beq a1, a6, .LBB4_2 +; RV32I-NOZBB-NEXT: # %bb.1: # %entry +; RV32I-NOZBB-NEXT: slt a7, a6, a1 +; RV32I-NOZBB-NEXT: beqz a7, .LBB4_3 +; RV32I-NOZBB-NEXT: j .LBB4_4 +; RV32I-NOZBB-NEXT: .LBB4_2: +; RV32I-NOZBB-NEXT: sltu a7, a5, a0 +; RV32I-NOZBB-NEXT: bnez a7, .LBB4_4 +; RV32I-NOZBB-NEXT: .LBB4_3: # %entry +; RV32I-NOZBB-NEXT: mv a1, a6 +; RV32I-NOZBB-NEXT: mv a0, a5 +; RV32I-NOZBB-NEXT: .LBB4_4: # %entry +; RV32I-NOZBB-NEXT: beqz a4, .LBB4_6 +; RV32I-NOZBB-NEXT: # %bb.5: # %entry +; RV32I-NOZBB-NEXT: ret +; RV32I-NOZBB-NEXT: .LBB4_6: # %entry +; RV32I-NOZBB-NEXT: mv a0, a2 +; RV32I-NOZBB-NEXT: mv a1, a3 +; RV32I-NOZBB-NEXT: ret +; +; RV64I-NOZBB-LABEL: select_example_smax_1: +; RV64I-NOZBB: # %bb.0: # %entry +; RV64I-NOZBB-NEXT: bge a3, a0, .LBB4_3 +; RV64I-NOZBB-NEXT: # %bb.1: # %entry +; RV64I-NOZBB-NEXT: beqz a2, .LBB4_4 +; RV64I-NOZBB-NEXT: .LBB4_2: # %entry +; RV64I-NOZBB-NEXT: ret +; RV64I-NOZBB-NEXT: .LBB4_3: # %entry +; RV64I-NOZBB-NEXT: mv a0, a3 +; RV64I-NOZBB-NEXT: bnez a2, .LBB4_2 +; RV64I-NOZBB-NEXT: .LBB4_4: # %entry +; RV64I-NOZBB-NEXT: mv a0, a1 +; RV64I-NOZBB-NEXT: ret +; +; RV32I-SFB-ZBB-LABEL: select_example_smax_1: +; RV32I-SFB-ZBB: # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT: sltu a7, a5, a0 +; RV32I-SFB-ZBB-NEXT: slt t0, a6, a1 +; RV32I-SFB-ZBB-NEXT: bne a1, a6, .LBB4_2 +; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT: mv t0, a7 +; RV32I-SFB-ZBB-NEXT: .LBB4_2: # %entry +; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB4_4 +; RV32I-SFB-ZBB-NEXT: # %bb.3: # %entry +; RV32I-SFB-ZBB-NEXT: mv a1, a6 +; RV32I-SFB-ZBB-NEXT: .LBB4_4: # %entry +; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB4_6 +; RV32I-SFB-ZBB-NEXT: # %bb.5: # %entry +; RV32I-SFB-ZBB-NEXT: mv a0, a5 +; RV32I-SFB-ZBB-NEXT: .LBB4_6: # %entry +; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB4_8 +; RV32I-SFB-ZBB-NEXT: # %bb.7: # %entry +; RV32I-SFB-ZBB-NEXT: mv a0, a2 +; RV32I-SFB-ZBB-NEXT: .LBB4_8: # %entry +; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB4_10 +; RV32I-SFB-ZBB-NEXT: # %bb.9: # %entry +; RV32I-SFB-ZBB-NEXT: mv a1, a3 +; RV32I-SFB-ZBB-NEXT: .LBB4_10: # %entry +; RV32I-SFB-ZBB-NEXT: ret +; +; RV64I-SFB-ZBB-LABEL: select_example_smax_1: +; RV64I-SFB-ZBB: # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB4_2 +; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT: max a1, a0, a3 +; RV64I-SFB-ZBB-NEXT: .LBB4_2: # %entry +; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: ret +entry: + %res = call i64 @llvm.smax.i64(i64 %a, i64 %y) + %sel = select i1 %x, i64 %res, i64 %b + ret i64 %sel +} + +define i64 @select_example_smin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { +; RV32I-NOZBB-LABEL: select_example_smin_1: +; RV32I-NOZBB: # %bb.0: # %entry +; RV32I-NOZBB-NEXT: beq a1, a6, .LBB5_2 +; RV32I-NOZBB-NEXT: # %bb.1: # %entry +; RV32I-NOZBB-NEXT: slt a7, a1, a6 +; RV32I-NOZBB-NEXT: beqz a7, .LBB5_3 +; RV32I-NOZBB-NEXT: j .LBB5_4 +; RV32I-NOZBB-NEXT: .LBB5_2: +; RV32I-NOZBB-NEXT: sltu a7, a0, a5 +; RV32I-NOZBB-NEXT: bnez a7, .LBB5_4 +; RV32I-NOZBB-NEXT: .LBB5_3: # %entry +; RV32I-NOZBB-NEXT: mv a1, a6 +; RV32I-NOZBB-NEXT: mv a0, a5 +; RV32I-NOZBB-NEXT: .LBB5_4: # %entry +; RV32I-NOZBB-NEXT: beqz a4, .LBB5_6 +; RV32I-NOZBB-NEXT: # %bb.5: # %entry +; RV32I-NOZBB-NEXT: ret +; RV32I-NOZBB-NEXT: .LBB5_6: # %entry +; RV32I-NOZBB-NEXT: mv a0, a2 +; RV32I-NOZBB-NEXT: mv a1, a3 +; RV32I-NOZBB-NEXT: ret +; +; RV64I-NOZBB-LABEL: select_example_smin_1: +; RV64I-NOZBB: # %bb.0: # %entry +; RV64I-NOZBB-NEXT: bge a0, a3, .LBB5_3 +; RV64I-NOZBB-NEXT: # %bb.1: # %entry +; RV64I-NOZBB-NEXT: beqz a2, .LBB5_4 +; RV64I-NOZBB-NEXT: .LBB5_2: # %entry +; RV64I-NOZBB-NEXT: ret +; RV64I-NOZBB-NEXT: .LBB5_3: # %entry +; RV64I-NOZBB-NEXT: mv a0, a3 +; RV64I-NOZBB-NEXT: bnez a2, .LBB5_2 +; RV64I-NOZBB-NEXT: .LBB5_4: # %entry +; RV64I-NOZBB-NEXT: mv a0, a1 +; RV64I-NOZBB-NEXT: ret +; +; RV32I-SFB-ZBB-LABEL: select_example_smin_1: +; RV32I-SFB-ZBB: # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT: sltu a7, a0, a5 +; RV32I-SFB-ZBB-NEXT: slt t0, a1, a6 +; RV32I-SFB-ZBB-NEXT: bne a1, a6, .LBB5_2 +; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT: mv t0, a7 +; RV32I-SFB-ZBB-NEXT: .LBB5_2: # %entry +; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB5_4 +; RV32I-SFB-ZBB-NEXT: # %bb.3: # %entry +; RV32I-SFB-ZBB-NEXT: mv a1, a6 +; RV32I-SFB-ZBB-NEXT: .LBB5_4: # %entry +; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB5_6 +; RV32I-SFB-ZBB-NEXT: # %bb.5: # %entry +; RV32I-SFB-ZBB-NEXT: mv a0, a5 +; RV32I-SFB-ZBB-NEXT: .LBB5_6: # %entry +; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB5_8 +; RV32I-SFB-ZBB-NEXT: # %bb.7: # %entry +; RV32I-SFB-ZBB-NEXT: mv a0, a2 +; RV32I-SFB-ZBB-NEXT: .LBB5_8: # %entry +; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB5_10 +; RV32I-SFB-ZBB-NEXT: # %bb.9: # %entry +; RV32I-SFB-ZBB-NEXT: mv a1, a3 +; RV32I-SFB-ZBB-NEXT: .LBB5_10: # %entry +; RV32I-SFB-ZBB-NEXT: ret +; +; RV64I-SFB-ZBB-LABEL: select_example_smin_1: +; RV64I-SFB-ZBB: # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB5_2 +; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT: min a1, a0, a3 +; RV64I-SFB-ZBB-NEXT: .LBB5_2: # %entry +; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: ret +entry: + %res = call i64 @llvm.smin.i64(i64 %a, i64 %y) + %sel = select i1 %x, i64 %res, i64 %b + ret i64 %sel +} + +define i64 @select_example_umax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { +; RV32I-NOZBB-LABEL: select_example_umax_1: +; RV32I-NOZBB: # %bb.0: # %entry +; RV32I-NOZBB-NEXT: beq a1, a6, .LBB6_2 +; RV32I-NOZBB-NEXT: # %bb.1: # %entry +; RV32I-NOZBB-NEXT: sltu a7, a6, a1 +; RV32I-NOZBB-NEXT: beqz a7, .LBB6_3 +; RV32I-NOZBB-NEXT: j .LBB6_4 +; RV32I-NOZBB-NEXT: .LBB6_2: +; RV32I-NOZBB-NEXT: sltu a7, a5, a0 +; RV32I-NOZBB-NEXT: bnez a7, .LBB6_4 +; RV32I-NOZBB-NEXT: .LBB6_3: # %entry +; RV32I-NOZBB-NEXT: mv a1, a6 +; RV32I-NOZBB-NEXT: mv a0, a5 +; RV32I-NOZBB-NEXT: .LBB6_4: # %entry +; RV32I-NOZBB-NEXT: beqz a4, .LBB6_6 +; RV32I-NOZBB-NEXT: # %bb.5: # %entry +; RV32I-NOZBB-NEXT: ret +; RV32I-NOZBB-NEXT: .LBB6_6: # %entry +; RV32I-NOZBB-NEXT: mv a0, a2 +; RV32I-NOZBB-NEXT: mv a1, a3 +; RV32I-NOZBB-NEXT: ret +; +; RV64I-NOZBB-LABEL: select_example_umax_1: +; RV64I-NOZBB: # %bb.0: # %entry +; RV64I-NOZBB-NEXT: bgeu a3, a0, .LBB6_3 +; RV64I-NOZBB-NEXT: # %bb.1: # %entry +; RV64I-NOZBB-NEXT: beqz a2, .LBB6_4 +; RV64I-NOZBB-NEXT: .LBB6_2: # %entry +; RV64I-NOZBB-NEXT: ret +; RV64I-NOZBB-NEXT: .LBB6_3: # %entry +; RV64I-NOZBB-NEXT: mv a0, a3 +; RV64I-NOZBB-NEXT: bnez a2, .LBB6_2 +; RV64I-NOZBB-NEXT: .LBB6_4: # %entry +; RV64I-NOZBB-NEXT: mv a0, a1 +; RV64I-NOZBB-NEXT: ret +; +; RV32I-SFB-ZBB-LABEL: select_example_umax_1: +; RV32I-SFB-ZBB: # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT: sltu a7, a5, a0 +; RV32I-SFB-ZBB-NEXT: sltu t0, a6, a1 +; RV32I-SFB-ZBB-NEXT: bne a1, a6, .LBB6_2 +; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT: mv t0, a7 +; RV32I-SFB-ZBB-NEXT: .LBB6_2: # %entry +; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB6_4 +; RV32I-SFB-ZBB-NEXT: # %bb.3: # %entry +; RV32I-SFB-ZBB-NEXT: mv a1, a6 +; RV32I-SFB-ZBB-NEXT: .LBB6_4: # %entry +; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB6_6 +; RV32I-SFB-ZBB-NEXT: # %bb.5: # %entry +; RV32I-SFB-ZBB-NEXT: mv a0, a5 +; RV32I-SFB-ZBB-NEXT: .LBB6_6: # %entry +; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB6_8 +; RV32I-SFB-ZBB-NEXT: # %bb.7: # %entry +; RV32I-SFB-ZBB-NEXT: mv a0, a2 +; RV32I-SFB-ZBB-NEXT: .LBB6_8: # %entry +; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB6_10 +; RV32I-SFB-ZBB-NEXT: # %bb.9: # %entry +; RV32I-SFB-ZBB-NEXT: mv a1, a3 +; RV32I-SFB-ZBB-NEXT: .LBB6_10: # %entry +; RV32I-SFB-ZBB-NEXT: ret +; +; RV64I-SFB-ZBB-LABEL: select_example_umax_1: +; RV64I-SFB-ZBB: # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB6_2 +; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT: maxu a1, a0, a3 +; RV64I-SFB-ZBB-NEXT: .LBB6_2: # %entry +; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: ret +entry: + %res = call i64 @llvm.umax.i64(i64 %a, i64 %y) + %sel = select i1 %x, i64 %res, i64 %b + ret i64 %sel +} + +define i64 @select_example_umin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { +; RV32I-NOZBB-LABEL: select_example_umin_1: +; RV32I-NOZBB: # %bb.0: # %entry +; RV32I-NOZBB-NEXT: beq a1, a6, .LBB7_2 +; RV32I-NOZBB-NEXT: # %bb.1: # %entry +; RV32I-NOZBB-NEXT: sltu a7, a1, a6 +; RV32I-NOZBB-NEXT: beqz a7, .LBB7_3 +; RV32I-NOZBB-NEXT: j .LBB7_4 +; RV32I-NOZBB-NEXT: .LBB7_2: +; RV32I-NOZBB-NEXT: sltu a7, a0, a5 +; RV32I-NOZBB-NEXT: bnez a7, .LBB7_4 +; RV32I-NOZBB-NEXT: .LBB7_3: # %entry +; RV32I-NOZBB-NEXT: mv a1, a6 +; RV32I-NOZBB-NEXT: mv a0, a5 +; RV32I-NOZBB-NEXT: .LBB7_4: # %entry +; RV32I-NOZBB-NEXT: beqz a4, .LBB7_6 +; RV32I-NOZBB-NEXT: # %bb.5: # %entry +; RV32I-NOZBB-NEXT: ret +; RV32I-NOZBB-NEXT: .LBB7_6: # %entry +; RV32I-NOZBB-NEXT: mv a0, a2 +; RV32I-NOZBB-NEXT: mv a1, a3 +; RV32I-NOZBB-NEXT: ret +; +; RV64I-NOZBB-LABEL: select_example_umin_1: +; RV64I-NOZBB: # %bb.0: # %entry +; RV64I-NOZBB-NEXT: bgeu a0, a3, .LBB7_3 +; RV64I-NOZBB-NEXT: # %bb.1: # %entry +; RV64I-NOZBB-NEXT: beqz a2, .LBB7_4 +; RV64I-NOZBB-NEXT: .LBB7_2: # %entry +; RV64I-NOZBB-NEXT: ret +; RV64I-NOZBB-NEXT: .LBB7_3: # %entry +; RV64I-NOZBB-NEXT: mv a0, a3 +; RV64I-NOZBB-NEXT: bnez a2, .LBB7_2 +; RV64I-NOZBB-NEXT: .LBB7_4: # %entry +; RV64I-NOZBB-NEXT: mv a0, a1 +; RV64I-NOZBB-NEXT: ret +; +; RV32I-SFB-ZBB-LABEL: select_example_umin_1: +; RV32I-SFB-ZBB: # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT: sltu a7, a0, a5 +; RV32I-SFB-ZBB-NEXT: sltu t0, a1, a6 +; RV32I-SFB-ZBB-NEXT: bne a1, a6, .LBB7_2 +; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT: mv t0, a7 +; RV32I-SFB-ZBB-NEXT: .LBB7_2: # %entry +; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB7_4 +; RV32I-SFB-ZBB-NEXT: # %bb.3: # %entry +; RV32I-SFB-ZBB-NEXT: mv a1, a6 +; RV32I-SFB-ZBB-NEXT: .LBB7_4: # %entry +; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB7_6 +; RV32I-SFB-ZBB-NEXT: # %bb.5: # %entry +; RV32I-SFB-ZBB-NEXT: mv a0, a5 +; RV32I-SFB-ZBB-NEXT: .LBB7_6: # %entry +; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB7_8 +; RV32I-SFB-ZBB-NEXT: # %bb.7: # %entry +; RV32I-SFB-ZBB-NEXT: mv a0, a2 +; RV32I-SFB-ZBB-NEXT: .LBB7_8: # %entry +; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB7_10 +; RV32I-SFB-ZBB-NEXT: # %bb.9: # %entry +; RV32I-SFB-ZBB-NEXT: mv a1, a3 +; RV32I-SFB-ZBB-NEXT: .LBB7_10: # %entry +; RV32I-SFB-ZBB-NEXT: ret +; +; RV64I-SFB-ZBB-LABEL: select_example_umin_1: +; RV64I-SFB-ZBB: # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB7_2 +; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT: minu a1, a0, a3 +; RV64I-SFB-ZBB-NEXT: .LBB7_2: # %entry +; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: ret +entry: + %res = call i64 @llvm.umin.i64(i64 %a, i64 %y) + %sel = select i1 %x, i64 %res, i64 %b + ret i64 %sel +} From 2374d935f19178c8cbb01ed387ba357470149232 Mon Sep 17 00:00:00 2001 From: Harsh Chandel Date: Tue, 21 Oct 2025 16:32:32 +0530 Subject: [PATCH 3/6] fixup! clang-format Change-Id: I14c0493b53c643c96ee5cb0ce3a8531f9d33e207 --- llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp | 16 ++++++++++++---- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 16 ++++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 567a8da50a1db..96c1890120d8d 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -232,10 +232,18 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, case RISCV::PseudoCCAND: NewOpc = RISCV::AND; break; case RISCV::PseudoCCOR: NewOpc = RISCV::OR; break; case RISCV::PseudoCCXOR: NewOpc = RISCV::XOR; break; - case RISCV::PseudoCCMAX: NewOpc = RISCV::MAX; break; - case RISCV::PseudoCCMIN: NewOpc = RISCV::MIN; break; - case RISCV::PseudoCCMAXU: NewOpc = RISCV::MAXU; break; - case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break; + case RISCV::PseudoCCMAX: + NewOpc = RISCV::MAX; + break; + case RISCV::PseudoCCMIN: + NewOpc = RISCV::MIN; + break; + case RISCV::PseudoCCMAXU: + NewOpc = RISCV::MAXU; + break; + case RISCV::PseudoCCMINU: + NewOpc = RISCV::MINU; + break; case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break; case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break; case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 435df1e4b91b6..cc860241c4164 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1698,10 +1698,18 @@ unsigned getPredicatedOpcode(unsigned Opcode) { case RISCV::AND: return RISCV::PseudoCCAND; break; case RISCV::OR: return RISCV::PseudoCCOR; break; case RISCV::XOR: return RISCV::PseudoCCXOR; break; - case RISCV::MAX: return RISCV::PseudoCCMAX; break; - case RISCV::MAXU: return RISCV::PseudoCCMAXU; break; - case RISCV::MIN: return RISCV::PseudoCCMIN; break; - case RISCV::MINU: return RISCV::PseudoCCMINU; break; + case RISCV::MAX: + return RISCV::PseudoCCMAX; + break; + case RISCV::MAXU: + return RISCV::PseudoCCMAXU; + break; + case RISCV::MIN: + return RISCV::PseudoCCMIN; + break; + case RISCV::MINU: + return RISCV::PseudoCCMINU; + break; case RISCV::ADDI: return RISCV::PseudoCCADDI; break; case RISCV::SLLI: return RISCV::PseudoCCSLLI; break; From 4acf425292e65389411ca4ec1d45cb28b7edc9a4 Mon Sep 17 00:00:00 2001 From: Harsh Chandel Date: Tue, 28 Oct 2025 12:06:54 +0530 Subject: [PATCH 4/6] fixup! Add new feature and update tests Change-Id: I152ac21ea79271bf18d9f783fadecf3ce05d53da --- .../Target/RISCV/RISCVExpandPseudoInsts.cpp | 18 +- llvm/lib/Target/RISCV/RISCVFeatures.td | 4 + llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 16 +- .../RISCV/short-forward-branch-opt-min-max.ll | 504 +++++++++--------- 4 files changed, 274 insertions(+), 268 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 96c1890120d8d..526675a682d86 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -221,6 +221,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, .addImm(0); } else { unsigned NewOpc; + // clang-format off switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected opcode!"); @@ -232,18 +233,10 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, case RISCV::PseudoCCAND: NewOpc = RISCV::AND; break; case RISCV::PseudoCCOR: NewOpc = RISCV::OR; break; case RISCV::PseudoCCXOR: NewOpc = RISCV::XOR; break; - case RISCV::PseudoCCMAX: - NewOpc = RISCV::MAX; - break; - case RISCV::PseudoCCMIN: - NewOpc = RISCV::MIN; - break; - case RISCV::PseudoCCMAXU: - NewOpc = RISCV::MAXU; - break; - case RISCV::PseudoCCMINU: - NewOpc = RISCV::MINU; - break; + case RISCV::PseudoCCMAX: NewOpc = RISCV::MAX; break; + case RISCV::PseudoCCMIN: NewOpc = RISCV::MIN; break; + case RISCV::PseudoCCMAXU: NewOpc = RISCV::MAXU; break; + case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break; case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break; case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break; case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break; @@ -266,6 +259,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, case RISCV::PseudoCCNDS_BFOS: NewOpc = RISCV::NDS_BFOS; break; case RISCV::PseudoCCNDS_BFOZ: NewOpc = RISCV::NDS_BFOZ; break; } + // clang-format on if (NewOpc == RISCV::NDS_BFOZ || NewOpc == RISCV::NDS_BFOS) { BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg) diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 9e6b7f0327eb8..753e86cec7477 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1850,6 +1850,10 @@ def TuneShortForwardBranchOpt def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">; def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">; +def TuneShortForwardBranchIMinMax + : SubtargetFeature<"short-forward-branch-i-minmax", "HasShortForwardBranchIMinMax", + "true", "Enable short forward branch optimization for min,max instructions in Zbb", [TuneShortForwardBranchOpt]>; + // Some subtargets require a S2V transfer buffer to move scalars into vectors. // FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure. def TuneNoSinkSplatOperands diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 479b9050bd732..002456b250cc7 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1739,7 +1739,8 @@ unsigned getPredicatedOpcode(unsigned Opcode) { /// return the defining instruction. static MachineInstr *canFoldAsPredicatedOp(Register Reg, const MachineRegisterInfo &MRI, - const TargetInstrInfo *TII) { + const TargetInstrInfo *TII, + bool minmax) { if (!Reg.isVirtual()) return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) @@ -1747,6 +1748,12 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg, MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) return nullptr; + + if (!minmax && + (MI->getOpcode() == RISCV::MAX || MI->getOpcode() == RISCV::MIN || + MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU)) + return nullptr; + // Check if MI can be predicated and folded into the CCMOV. if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END) return nullptr; @@ -1809,11 +1816,12 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI, return nullptr; MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); - MachineInstr *DefMI = - canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this); + MachineInstr *DefMI = canFoldAsPredicatedOp( + MI.getOperand(5).getReg(), MRI, this, STI.hasShortForwardBranchIMinMax()); bool Invert = !DefMI; if (!DefMI) - DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this); + DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this, + STI.hasShortForwardBranchIMinMax()); if (!DefMI) return nullptr; diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll index 9fa4e350aced9..0a3f7181693df 100644 --- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll +++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll @@ -1,41 +1,41 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc < %s -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I-NOZBB -; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I-NOZBB -; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb,+short-forward-branch-opt | \ +; RUN: llc < %s -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I-NO-ZBB-SFB +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I-NO-ZBB-SFB +; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb,+short-forward-branch-i-minmax | \ ; RUN: FileCheck %s --check-prefixes=RV32I-SFB-ZBB -; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+short-forward-branch-opt | \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+short-forward-branch-i-minmax | \ ; RUN: FileCheck %s --check-prefixes=RV64I-SFB-ZBB define i32 @select_example_smax(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { -; RV32I-NOZBB-LABEL: select_example_smax: -; RV32I-NOZBB: # %bb.0: # %entry -; RV32I-NOZBB-NEXT: bge a3, a0, .LBB0_3 -; RV32I-NOZBB-NEXT: # %bb.1: # %entry -; RV32I-NOZBB-NEXT: beqz a2, .LBB0_4 -; RV32I-NOZBB-NEXT: .LBB0_2: # %entry -; RV32I-NOZBB-NEXT: ret -; RV32I-NOZBB-NEXT: .LBB0_3: # %entry -; RV32I-NOZBB-NEXT: mv a0, a3 -; RV32I-NOZBB-NEXT: bnez a2, .LBB0_2 -; RV32I-NOZBB-NEXT: .LBB0_4: # %entry -; RV32I-NOZBB-NEXT: mv a0, a1 -; RV32I-NOZBB-NEXT: ret +; RV32I-NO-ZBB-SFB-LABEL: select_example_smax: +; RV32I-NO-ZBB-SFB: # %bb.0: # %entry +; RV32I-NO-ZBB-SFB-NEXT: bge a3, a0, .LBB0_3 +; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV32I-NO-ZBB-SFB-NEXT: beqz a2, .LBB0_4 +; RV32I-NO-ZBB-SFB-NEXT: .LBB0_2: # %entry +; RV32I-NO-ZBB-SFB-NEXT: ret +; RV32I-NO-ZBB-SFB-NEXT: .LBB0_3: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a3 +; RV32I-NO-ZBB-SFB-NEXT: bnez a2, .LBB0_2 +; RV32I-NO-ZBB-SFB-NEXT: .LBB0_4: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a1 +; RV32I-NO-ZBB-SFB-NEXT: ret ; -; RV64I-NOZBB-LABEL: select_example_smax: -; RV64I-NOZBB: # %bb.0: # %entry -; RV64I-NOZBB-NEXT: sext.w a0, a0 -; RV64I-NOZBB-NEXT: sext.w a3, a3 -; RV64I-NOZBB-NEXT: bge a3, a0, .LBB0_3 -; RV64I-NOZBB-NEXT: # %bb.1: # %entry -; RV64I-NOZBB-NEXT: beqz a2, .LBB0_4 -; RV64I-NOZBB-NEXT: .LBB0_2: # %entry -; RV64I-NOZBB-NEXT: ret -; RV64I-NOZBB-NEXT: .LBB0_3: # %entry -; RV64I-NOZBB-NEXT: mv a0, a3 -; RV64I-NOZBB-NEXT: bnez a2, .LBB0_2 -; RV64I-NOZBB-NEXT: .LBB0_4: # %entry -; RV64I-NOZBB-NEXT: mv a0, a1 -; RV64I-NOZBB-NEXT: ret +; RV64I-NO-ZBB-SFB-LABEL: select_example_smax: +; RV64I-NO-ZBB-SFB: # %bb.0: # %entry +; RV64I-NO-ZBB-SFB-NEXT: sext.w a0, a0 +; RV64I-NO-ZBB-SFB-NEXT: sext.w a3, a3 +; RV64I-NO-ZBB-SFB-NEXT: bge a3, a0, .LBB0_3 +; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB0_4 +; RV64I-NO-ZBB-SFB-NEXT: .LBB0_2: # %entry +; RV64I-NO-ZBB-SFB-NEXT: ret +; RV64I-NO-ZBB-SFB-NEXT: .LBB0_3: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 +; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB0_2 +; RV64I-NO-ZBB-SFB-NEXT: .LBB0_4: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 +; RV64I-NO-ZBB-SFB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_smax: ; RV32I-SFB-ZBB: # %bb.0: # %entry @@ -63,35 +63,35 @@ entry: } define i32 @select_example_smin(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { -; RV32I-NOZBB-LABEL: select_example_smin: -; RV32I-NOZBB: # %bb.0: # %entry -; RV32I-NOZBB-NEXT: bge a0, a3, .LBB1_3 -; RV32I-NOZBB-NEXT: # %bb.1: # %entry -; RV32I-NOZBB-NEXT: beqz a2, .LBB1_4 -; RV32I-NOZBB-NEXT: .LBB1_2: # %entry -; RV32I-NOZBB-NEXT: ret -; RV32I-NOZBB-NEXT: .LBB1_3: # %entry -; RV32I-NOZBB-NEXT: mv a0, a3 -; RV32I-NOZBB-NEXT: bnez a2, .LBB1_2 -; RV32I-NOZBB-NEXT: .LBB1_4: # %entry -; RV32I-NOZBB-NEXT: mv a0, a1 -; RV32I-NOZBB-NEXT: ret +; RV32I-NO-ZBB-SFB-LABEL: select_example_smin: +; RV32I-NO-ZBB-SFB: # %bb.0: # %entry +; RV32I-NO-ZBB-SFB-NEXT: bge a0, a3, .LBB1_3 +; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV32I-NO-ZBB-SFB-NEXT: beqz a2, .LBB1_4 +; RV32I-NO-ZBB-SFB-NEXT: .LBB1_2: # %entry +; RV32I-NO-ZBB-SFB-NEXT: ret +; RV32I-NO-ZBB-SFB-NEXT: .LBB1_3: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a3 +; RV32I-NO-ZBB-SFB-NEXT: bnez a2, .LBB1_2 +; RV32I-NO-ZBB-SFB-NEXT: .LBB1_4: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a1 +; RV32I-NO-ZBB-SFB-NEXT: ret ; -; RV64I-NOZBB-LABEL: select_example_smin: -; RV64I-NOZBB: # %bb.0: # %entry -; RV64I-NOZBB-NEXT: sext.w a3, a3 -; RV64I-NOZBB-NEXT: sext.w a0, a0 -; RV64I-NOZBB-NEXT: bge a0, a3, .LBB1_3 -; RV64I-NOZBB-NEXT: # %bb.1: # %entry -; RV64I-NOZBB-NEXT: beqz a2, .LBB1_4 -; RV64I-NOZBB-NEXT: .LBB1_2: # %entry -; RV64I-NOZBB-NEXT: ret -; RV64I-NOZBB-NEXT: .LBB1_3: # %entry -; RV64I-NOZBB-NEXT: mv a0, a3 -; RV64I-NOZBB-NEXT: bnez a2, .LBB1_2 -; RV64I-NOZBB-NEXT: .LBB1_4: # %entry -; RV64I-NOZBB-NEXT: mv a0, a1 -; RV64I-NOZBB-NEXT: ret +; RV64I-NO-ZBB-SFB-LABEL: select_example_smin: +; RV64I-NO-ZBB-SFB: # %bb.0: # %entry +; RV64I-NO-ZBB-SFB-NEXT: sext.w a3, a3 +; RV64I-NO-ZBB-SFB-NEXT: sext.w a0, a0 +; RV64I-NO-ZBB-SFB-NEXT: bge a0, a3, .LBB1_3 +; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB1_4 +; RV64I-NO-ZBB-SFB-NEXT: .LBB1_2: # %entry +; RV64I-NO-ZBB-SFB-NEXT: ret +; RV64I-NO-ZBB-SFB-NEXT: .LBB1_3: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 +; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB1_2 +; RV64I-NO-ZBB-SFB-NEXT: .LBB1_4: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 +; RV64I-NO-ZBB-SFB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_smin: ; RV32I-SFB-ZBB: # %bb.0: # %entry @@ -119,35 +119,35 @@ entry: } define i32 @select_example_umax(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { -; RV32I-NOZBB-LABEL: select_example_umax: -; RV32I-NOZBB: # %bb.0: # %entry -; RV32I-NOZBB-NEXT: bgeu a3, a0, .LBB2_3 -; RV32I-NOZBB-NEXT: # %bb.1: # %entry -; RV32I-NOZBB-NEXT: beqz a2, .LBB2_4 -; RV32I-NOZBB-NEXT: .LBB2_2: # %entry -; RV32I-NOZBB-NEXT: ret -; RV32I-NOZBB-NEXT: .LBB2_3: # %entry -; RV32I-NOZBB-NEXT: mv a0, a3 -; RV32I-NOZBB-NEXT: bnez a2, .LBB2_2 -; RV32I-NOZBB-NEXT: .LBB2_4: # %entry -; RV32I-NOZBB-NEXT: mv a0, a1 -; RV32I-NOZBB-NEXT: ret +; RV32I-NO-ZBB-SFB-LABEL: select_example_umax: +; RV32I-NO-ZBB-SFB: # %bb.0: # %entry +; RV32I-NO-ZBB-SFB-NEXT: bgeu a3, a0, .LBB2_3 +; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV32I-NO-ZBB-SFB-NEXT: beqz a2, .LBB2_4 +; RV32I-NO-ZBB-SFB-NEXT: .LBB2_2: # %entry +; RV32I-NO-ZBB-SFB-NEXT: ret +; RV32I-NO-ZBB-SFB-NEXT: .LBB2_3: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a3 +; RV32I-NO-ZBB-SFB-NEXT: bnez a2, .LBB2_2 +; RV32I-NO-ZBB-SFB-NEXT: .LBB2_4: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a1 +; RV32I-NO-ZBB-SFB-NEXT: ret ; -; RV64I-NOZBB-LABEL: select_example_umax: -; RV64I-NOZBB: # %bb.0: # %entry -; RV64I-NOZBB-NEXT: sext.w a0, a0 -; RV64I-NOZBB-NEXT: sext.w a3, a3 -; RV64I-NOZBB-NEXT: bgeu a3, a0, .LBB2_3 -; RV64I-NOZBB-NEXT: # %bb.1: # %entry -; RV64I-NOZBB-NEXT: beqz a2, .LBB2_4 -; RV64I-NOZBB-NEXT: .LBB2_2: # %entry -; RV64I-NOZBB-NEXT: ret -; RV64I-NOZBB-NEXT: .LBB2_3: # %entry -; RV64I-NOZBB-NEXT: mv a0, a3 -; RV64I-NOZBB-NEXT: bnez a2, .LBB2_2 -; RV64I-NOZBB-NEXT: .LBB2_4: # %entry -; RV64I-NOZBB-NEXT: mv a0, a1 -; RV64I-NOZBB-NEXT: ret +; RV64I-NO-ZBB-SFB-LABEL: select_example_umax: +; RV64I-NO-ZBB-SFB: # %bb.0: # %entry +; RV64I-NO-ZBB-SFB-NEXT: sext.w a0, a0 +; RV64I-NO-ZBB-SFB-NEXT: sext.w a3, a3 +; RV64I-NO-ZBB-SFB-NEXT: bgeu a3, a0, .LBB2_3 +; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB2_4 +; RV64I-NO-ZBB-SFB-NEXT: .LBB2_2: # %entry +; RV64I-NO-ZBB-SFB-NEXT: ret +; RV64I-NO-ZBB-SFB-NEXT: .LBB2_3: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 +; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB2_2 +; RV64I-NO-ZBB-SFB-NEXT: .LBB2_4: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 +; RV64I-NO-ZBB-SFB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_umax: ; RV32I-SFB-ZBB: # %bb.0: # %entry @@ -175,35 +175,35 @@ entry: } define i32 @select_example_umin(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { -; RV32I-NOZBB-LABEL: select_example_umin: -; RV32I-NOZBB: # %bb.0: # %entry -; RV32I-NOZBB-NEXT: bgeu a0, a3, .LBB3_3 -; RV32I-NOZBB-NEXT: # %bb.1: # %entry -; RV32I-NOZBB-NEXT: beqz a2, .LBB3_4 -; RV32I-NOZBB-NEXT: .LBB3_2: # %entry -; RV32I-NOZBB-NEXT: ret -; RV32I-NOZBB-NEXT: .LBB3_3: # %entry -; RV32I-NOZBB-NEXT: mv a0, a3 -; RV32I-NOZBB-NEXT: bnez a2, .LBB3_2 -; RV32I-NOZBB-NEXT: .LBB3_4: # %entry -; RV32I-NOZBB-NEXT: mv a0, a1 -; RV32I-NOZBB-NEXT: ret +; RV32I-NO-ZBB-SFB-LABEL: select_example_umin: +; RV32I-NO-ZBB-SFB: # %bb.0: # %entry +; RV32I-NO-ZBB-SFB-NEXT: bgeu a0, a3, .LBB3_3 +; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV32I-NO-ZBB-SFB-NEXT: beqz a2, .LBB3_4 +; RV32I-NO-ZBB-SFB-NEXT: .LBB3_2: # %entry +; RV32I-NO-ZBB-SFB-NEXT: ret +; RV32I-NO-ZBB-SFB-NEXT: .LBB3_3: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a3 +; RV32I-NO-ZBB-SFB-NEXT: bnez a2, .LBB3_2 +; RV32I-NO-ZBB-SFB-NEXT: .LBB3_4: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a1 +; RV32I-NO-ZBB-SFB-NEXT: ret ; -; RV64I-NOZBB-LABEL: select_example_umin: -; RV64I-NOZBB: # %bb.0: # %entry -; RV64I-NOZBB-NEXT: sext.w a3, a3 -; RV64I-NOZBB-NEXT: sext.w a0, a0 -; RV64I-NOZBB-NEXT: bgeu a0, a3, .LBB3_3 -; RV64I-NOZBB-NEXT: # %bb.1: # %entry -; RV64I-NOZBB-NEXT: beqz a2, .LBB3_4 -; RV64I-NOZBB-NEXT: .LBB3_2: # %entry -; RV64I-NOZBB-NEXT: ret -; RV64I-NOZBB-NEXT: .LBB3_3: # %entry -; RV64I-NOZBB-NEXT: mv a0, a3 -; RV64I-NOZBB-NEXT: bnez a2, .LBB3_2 -; RV64I-NOZBB-NEXT: .LBB3_4: # %entry -; RV64I-NOZBB-NEXT: mv a0, a1 -; RV64I-NOZBB-NEXT: ret +; RV64I-NO-ZBB-SFB-LABEL: select_example_umin: +; RV64I-NO-ZBB-SFB: # %bb.0: # %entry +; RV64I-NO-ZBB-SFB-NEXT: sext.w a3, a3 +; RV64I-NO-ZBB-SFB-NEXT: sext.w a0, a0 +; RV64I-NO-ZBB-SFB-NEXT: bgeu a0, a3, .LBB3_3 +; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB3_4 +; RV64I-NO-ZBB-SFB-NEXT: .LBB3_2: # %entry +; RV64I-NO-ZBB-SFB-NEXT: ret +; RV64I-NO-ZBB-SFB-NEXT: .LBB3_3: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 +; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB3_2 +; RV64I-NO-ZBB-SFB-NEXT: .LBB3_4: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 +; RV64I-NO-ZBB-SFB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_umin: ; RV32I-SFB-ZBB: # %bb.0: # %entry @@ -231,41 +231,41 @@ entry: } define i64 @select_example_smax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { -; RV32I-NOZBB-LABEL: select_example_smax_1: -; RV32I-NOZBB: # %bb.0: # %entry -; RV32I-NOZBB-NEXT: beq a1, a6, .LBB4_2 -; RV32I-NOZBB-NEXT: # %bb.1: # %entry -; RV32I-NOZBB-NEXT: slt a7, a6, a1 -; RV32I-NOZBB-NEXT: beqz a7, .LBB4_3 -; RV32I-NOZBB-NEXT: j .LBB4_4 -; RV32I-NOZBB-NEXT: .LBB4_2: -; RV32I-NOZBB-NEXT: sltu a7, a5, a0 -; RV32I-NOZBB-NEXT: bnez a7, .LBB4_4 -; RV32I-NOZBB-NEXT: .LBB4_3: # %entry -; RV32I-NOZBB-NEXT: mv a1, a6 -; RV32I-NOZBB-NEXT: mv a0, a5 -; RV32I-NOZBB-NEXT: .LBB4_4: # %entry -; RV32I-NOZBB-NEXT: beqz a4, .LBB4_6 -; RV32I-NOZBB-NEXT: # %bb.5: # %entry -; RV32I-NOZBB-NEXT: ret -; RV32I-NOZBB-NEXT: .LBB4_6: # %entry -; RV32I-NOZBB-NEXT: mv a0, a2 -; RV32I-NOZBB-NEXT: mv a1, a3 -; RV32I-NOZBB-NEXT: ret +; RV32I-NO-ZBB-SFB-LABEL: select_example_smax_1: +; RV32I-NO-ZBB-SFB: # %bb.0: # %entry +; RV32I-NO-ZBB-SFB-NEXT: beq a1, a6, .LBB4_2 +; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV32I-NO-ZBB-SFB-NEXT: slt a7, a6, a1 +; RV32I-NO-ZBB-SFB-NEXT: beqz a7, .LBB4_3 +; RV32I-NO-ZBB-SFB-NEXT: j .LBB4_4 +; RV32I-NO-ZBB-SFB-NEXT: .LBB4_2: +; RV32I-NO-ZBB-SFB-NEXT: sltu a7, a5, a0 +; RV32I-NO-ZBB-SFB-NEXT: bnez a7, .LBB4_4 +; RV32I-NO-ZBB-SFB-NEXT: .LBB4_3: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a1, a6 +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a5 +; RV32I-NO-ZBB-SFB-NEXT: .LBB4_4: # %entry +; RV32I-NO-ZBB-SFB-NEXT: beqz a4, .LBB4_6 +; RV32I-NO-ZBB-SFB-NEXT: # %bb.5: # %entry +; RV32I-NO-ZBB-SFB-NEXT: ret +; RV32I-NO-ZBB-SFB-NEXT: .LBB4_6: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a2 +; RV32I-NO-ZBB-SFB-NEXT: mv a1, a3 +; RV32I-NO-ZBB-SFB-NEXT: ret ; -; RV64I-NOZBB-LABEL: select_example_smax_1: -; RV64I-NOZBB: # %bb.0: # %entry -; RV64I-NOZBB-NEXT: bge a3, a0, .LBB4_3 -; RV64I-NOZBB-NEXT: # %bb.1: # %entry -; RV64I-NOZBB-NEXT: beqz a2, .LBB4_4 -; RV64I-NOZBB-NEXT: .LBB4_2: # %entry -; RV64I-NOZBB-NEXT: ret -; RV64I-NOZBB-NEXT: .LBB4_3: # %entry -; RV64I-NOZBB-NEXT: mv a0, a3 -; RV64I-NOZBB-NEXT: bnez a2, .LBB4_2 -; RV64I-NOZBB-NEXT: .LBB4_4: # %entry -; RV64I-NOZBB-NEXT: mv a0, a1 -; RV64I-NOZBB-NEXT: ret +; RV64I-NO-ZBB-SFB-LABEL: select_example_smax_1: +; RV64I-NO-ZBB-SFB: # %bb.0: # %entry +; RV64I-NO-ZBB-SFB-NEXT: bge a3, a0, .LBB4_3 +; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB4_4 +; RV64I-NO-ZBB-SFB-NEXT: .LBB4_2: # %entry +; RV64I-NO-ZBB-SFB-NEXT: ret +; RV64I-NO-ZBB-SFB-NEXT: .LBB4_3: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 +; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB4_2 +; RV64I-NO-ZBB-SFB-NEXT: .LBB4_4: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 +; RV64I-NO-ZBB-SFB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_smax_1: ; RV32I-SFB-ZBB: # %bb.0: # %entry @@ -308,41 +308,41 @@ entry: } define i64 @select_example_smin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { -; RV32I-NOZBB-LABEL: select_example_smin_1: -; RV32I-NOZBB: # %bb.0: # %entry -; RV32I-NOZBB-NEXT: beq a1, a6, .LBB5_2 -; RV32I-NOZBB-NEXT: # %bb.1: # %entry -; RV32I-NOZBB-NEXT: slt a7, a1, a6 -; RV32I-NOZBB-NEXT: beqz a7, .LBB5_3 -; RV32I-NOZBB-NEXT: j .LBB5_4 -; RV32I-NOZBB-NEXT: .LBB5_2: -; RV32I-NOZBB-NEXT: sltu a7, a0, a5 -; RV32I-NOZBB-NEXT: bnez a7, .LBB5_4 -; RV32I-NOZBB-NEXT: .LBB5_3: # %entry -; RV32I-NOZBB-NEXT: mv a1, a6 -; RV32I-NOZBB-NEXT: mv a0, a5 -; RV32I-NOZBB-NEXT: .LBB5_4: # %entry -; RV32I-NOZBB-NEXT: beqz a4, .LBB5_6 -; RV32I-NOZBB-NEXT: # %bb.5: # %entry -; RV32I-NOZBB-NEXT: ret -; RV32I-NOZBB-NEXT: .LBB5_6: # %entry -; RV32I-NOZBB-NEXT: mv a0, a2 -; RV32I-NOZBB-NEXT: mv a1, a3 -; RV32I-NOZBB-NEXT: ret +; RV32I-NO-ZBB-SFB-LABEL: select_example_smin_1: +; RV32I-NO-ZBB-SFB: # %bb.0: # %entry +; RV32I-NO-ZBB-SFB-NEXT: beq a1, a6, .LBB5_2 +; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV32I-NO-ZBB-SFB-NEXT: slt a7, a1, a6 +; RV32I-NO-ZBB-SFB-NEXT: beqz a7, .LBB5_3 +; RV32I-NO-ZBB-SFB-NEXT: j .LBB5_4 +; RV32I-NO-ZBB-SFB-NEXT: .LBB5_2: +; RV32I-NO-ZBB-SFB-NEXT: sltu a7, a0, a5 +; RV32I-NO-ZBB-SFB-NEXT: bnez a7, .LBB5_4 +; RV32I-NO-ZBB-SFB-NEXT: .LBB5_3: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a1, a6 +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a5 +; RV32I-NO-ZBB-SFB-NEXT: .LBB5_4: # %entry +; RV32I-NO-ZBB-SFB-NEXT: beqz a4, .LBB5_6 +; RV32I-NO-ZBB-SFB-NEXT: # %bb.5: # %entry +; RV32I-NO-ZBB-SFB-NEXT: ret +; RV32I-NO-ZBB-SFB-NEXT: .LBB5_6: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a2 +; RV32I-NO-ZBB-SFB-NEXT: mv a1, a3 +; RV32I-NO-ZBB-SFB-NEXT: ret ; -; RV64I-NOZBB-LABEL: select_example_smin_1: -; RV64I-NOZBB: # %bb.0: # %entry -; RV64I-NOZBB-NEXT: bge a0, a3, .LBB5_3 -; RV64I-NOZBB-NEXT: # %bb.1: # %entry -; RV64I-NOZBB-NEXT: beqz a2, .LBB5_4 -; RV64I-NOZBB-NEXT: .LBB5_2: # %entry -; RV64I-NOZBB-NEXT: ret -; RV64I-NOZBB-NEXT: .LBB5_3: # %entry -; RV64I-NOZBB-NEXT: mv a0, a3 -; RV64I-NOZBB-NEXT: bnez a2, .LBB5_2 -; RV64I-NOZBB-NEXT: .LBB5_4: # %entry -; RV64I-NOZBB-NEXT: mv a0, a1 -; RV64I-NOZBB-NEXT: ret +; RV64I-NO-ZBB-SFB-LABEL: select_example_smin_1: +; RV64I-NO-ZBB-SFB: # %bb.0: # %entry +; RV64I-NO-ZBB-SFB-NEXT: bge a0, a3, .LBB5_3 +; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB5_4 +; RV64I-NO-ZBB-SFB-NEXT: .LBB5_2: # %entry +; RV64I-NO-ZBB-SFB-NEXT: ret +; RV64I-NO-ZBB-SFB-NEXT: .LBB5_3: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 +; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB5_2 +; RV64I-NO-ZBB-SFB-NEXT: .LBB5_4: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 +; RV64I-NO-ZBB-SFB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_smin_1: ; RV32I-SFB-ZBB: # %bb.0: # %entry @@ -385,41 +385,41 @@ entry: } define i64 @select_example_umax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { -; RV32I-NOZBB-LABEL: select_example_umax_1: -; RV32I-NOZBB: # %bb.0: # %entry -; RV32I-NOZBB-NEXT: beq a1, a6, .LBB6_2 -; RV32I-NOZBB-NEXT: # %bb.1: # %entry -; RV32I-NOZBB-NEXT: sltu a7, a6, a1 -; RV32I-NOZBB-NEXT: beqz a7, .LBB6_3 -; RV32I-NOZBB-NEXT: j .LBB6_4 -; RV32I-NOZBB-NEXT: .LBB6_2: -; RV32I-NOZBB-NEXT: sltu a7, a5, a0 -; RV32I-NOZBB-NEXT: bnez a7, .LBB6_4 -; RV32I-NOZBB-NEXT: .LBB6_3: # %entry -; RV32I-NOZBB-NEXT: mv a1, a6 -; RV32I-NOZBB-NEXT: mv a0, a5 -; RV32I-NOZBB-NEXT: .LBB6_4: # %entry -; RV32I-NOZBB-NEXT: beqz a4, .LBB6_6 -; RV32I-NOZBB-NEXT: # %bb.5: # %entry -; RV32I-NOZBB-NEXT: ret -; RV32I-NOZBB-NEXT: .LBB6_6: # %entry -; RV32I-NOZBB-NEXT: mv a0, a2 -; RV32I-NOZBB-NEXT: mv a1, a3 -; RV32I-NOZBB-NEXT: ret +; RV32I-NO-ZBB-SFB-LABEL: select_example_umax_1: +; RV32I-NO-ZBB-SFB: # %bb.0: # %entry +; RV32I-NO-ZBB-SFB-NEXT: beq a1, a6, .LBB6_2 +; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV32I-NO-ZBB-SFB-NEXT: sltu a7, a6, a1 +; RV32I-NO-ZBB-SFB-NEXT: beqz a7, .LBB6_3 +; RV32I-NO-ZBB-SFB-NEXT: j .LBB6_4 +; RV32I-NO-ZBB-SFB-NEXT: .LBB6_2: +; RV32I-NO-ZBB-SFB-NEXT: sltu a7, a5, a0 +; RV32I-NO-ZBB-SFB-NEXT: bnez a7, .LBB6_4 +; RV32I-NO-ZBB-SFB-NEXT: .LBB6_3: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a1, a6 +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a5 +; RV32I-NO-ZBB-SFB-NEXT: .LBB6_4: # %entry +; RV32I-NO-ZBB-SFB-NEXT: beqz a4, .LBB6_6 +; RV32I-NO-ZBB-SFB-NEXT: # %bb.5: # %entry +; RV32I-NO-ZBB-SFB-NEXT: ret +; RV32I-NO-ZBB-SFB-NEXT: .LBB6_6: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a2 +; RV32I-NO-ZBB-SFB-NEXT: mv a1, a3 +; RV32I-NO-ZBB-SFB-NEXT: ret ; -; RV64I-NOZBB-LABEL: select_example_umax_1: -; RV64I-NOZBB: # %bb.0: # %entry -; RV64I-NOZBB-NEXT: bgeu a3, a0, .LBB6_3 -; RV64I-NOZBB-NEXT: # %bb.1: # %entry -; RV64I-NOZBB-NEXT: beqz a2, .LBB6_4 -; RV64I-NOZBB-NEXT: .LBB6_2: # %entry -; RV64I-NOZBB-NEXT: ret -; RV64I-NOZBB-NEXT: .LBB6_3: # %entry -; RV64I-NOZBB-NEXT: mv a0, a3 -; RV64I-NOZBB-NEXT: bnez a2, .LBB6_2 -; RV64I-NOZBB-NEXT: .LBB6_4: # %entry -; RV64I-NOZBB-NEXT: mv a0, a1 -; RV64I-NOZBB-NEXT: ret +; RV64I-NO-ZBB-SFB-LABEL: select_example_umax_1: +; RV64I-NO-ZBB-SFB: # %bb.0: # %entry +; RV64I-NO-ZBB-SFB-NEXT: bgeu a3, a0, .LBB6_3 +; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB6_4 +; RV64I-NO-ZBB-SFB-NEXT: .LBB6_2: # %entry +; RV64I-NO-ZBB-SFB-NEXT: ret +; RV64I-NO-ZBB-SFB-NEXT: .LBB6_3: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 +; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB6_2 +; RV64I-NO-ZBB-SFB-NEXT: .LBB6_4: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 +; RV64I-NO-ZBB-SFB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_umax_1: ; RV32I-SFB-ZBB: # %bb.0: # %entry @@ -462,41 +462,41 @@ entry: } define i64 @select_example_umin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { -; RV32I-NOZBB-LABEL: select_example_umin_1: -; RV32I-NOZBB: # %bb.0: # %entry -; RV32I-NOZBB-NEXT: beq a1, a6, .LBB7_2 -; RV32I-NOZBB-NEXT: # %bb.1: # %entry -; RV32I-NOZBB-NEXT: sltu a7, a1, a6 -; RV32I-NOZBB-NEXT: beqz a7, .LBB7_3 -; RV32I-NOZBB-NEXT: j .LBB7_4 -; RV32I-NOZBB-NEXT: .LBB7_2: -; RV32I-NOZBB-NEXT: sltu a7, a0, a5 -; RV32I-NOZBB-NEXT: bnez a7, .LBB7_4 -; RV32I-NOZBB-NEXT: .LBB7_3: # %entry -; RV32I-NOZBB-NEXT: mv a1, a6 -; RV32I-NOZBB-NEXT: mv a0, a5 -; RV32I-NOZBB-NEXT: .LBB7_4: # %entry -; RV32I-NOZBB-NEXT: beqz a4, .LBB7_6 -; RV32I-NOZBB-NEXT: # %bb.5: # %entry -; RV32I-NOZBB-NEXT: ret -; RV32I-NOZBB-NEXT: .LBB7_6: # %entry -; RV32I-NOZBB-NEXT: mv a0, a2 -; RV32I-NOZBB-NEXT: mv a1, a3 -; RV32I-NOZBB-NEXT: ret +; RV32I-NO-ZBB-SFB-LABEL: select_example_umin_1: +; RV32I-NO-ZBB-SFB: # %bb.0: # %entry +; RV32I-NO-ZBB-SFB-NEXT: beq a1, a6, .LBB7_2 +; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV32I-NO-ZBB-SFB-NEXT: sltu a7, a1, a6 +; RV32I-NO-ZBB-SFB-NEXT: beqz a7, .LBB7_3 +; RV32I-NO-ZBB-SFB-NEXT: j .LBB7_4 +; RV32I-NO-ZBB-SFB-NEXT: .LBB7_2: +; RV32I-NO-ZBB-SFB-NEXT: sltu a7, a0, a5 +; RV32I-NO-ZBB-SFB-NEXT: bnez a7, .LBB7_4 +; RV32I-NO-ZBB-SFB-NEXT: .LBB7_3: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a1, a6 +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a5 +; RV32I-NO-ZBB-SFB-NEXT: .LBB7_4: # %entry +; RV32I-NO-ZBB-SFB-NEXT: beqz a4, .LBB7_6 +; RV32I-NO-ZBB-SFB-NEXT: # %bb.5: # %entry +; RV32I-NO-ZBB-SFB-NEXT: ret +; RV32I-NO-ZBB-SFB-NEXT: .LBB7_6: # %entry +; RV32I-NO-ZBB-SFB-NEXT: mv a0, a2 +; RV32I-NO-ZBB-SFB-NEXT: mv a1, a3 +; RV32I-NO-ZBB-SFB-NEXT: ret ; -; RV64I-NOZBB-LABEL: select_example_umin_1: -; RV64I-NOZBB: # %bb.0: # %entry -; RV64I-NOZBB-NEXT: bgeu a0, a3, .LBB7_3 -; RV64I-NOZBB-NEXT: # %bb.1: # %entry -; RV64I-NOZBB-NEXT: beqz a2, .LBB7_4 -; RV64I-NOZBB-NEXT: .LBB7_2: # %entry -; RV64I-NOZBB-NEXT: ret -; RV64I-NOZBB-NEXT: .LBB7_3: # %entry -; RV64I-NOZBB-NEXT: mv a0, a3 -; RV64I-NOZBB-NEXT: bnez a2, .LBB7_2 -; RV64I-NOZBB-NEXT: .LBB7_4: # %entry -; RV64I-NOZBB-NEXT: mv a0, a1 -; RV64I-NOZBB-NEXT: ret +; RV64I-NO-ZBB-SFB-LABEL: select_example_umin_1: +; RV64I-NO-ZBB-SFB: # %bb.0: # %entry +; RV64I-NO-ZBB-SFB-NEXT: bgeu a0, a3, .LBB7_3 +; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry +; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB7_4 +; RV64I-NO-ZBB-SFB-NEXT: .LBB7_2: # %entry +; RV64I-NO-ZBB-SFB-NEXT: ret +; RV64I-NO-ZBB-SFB-NEXT: .LBB7_3: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 +; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB7_2 +; RV64I-NO-ZBB-SFB-NEXT: .LBB7_4: # %entry +; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 +; RV64I-NO-ZBB-SFB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_umin_1: ; RV32I-SFB-ZBB: # %bb.0: # %entry From fa4c9f18ed5ad08fbfb81d52a08aebe39537763d Mon Sep 17 00:00:00 2001 From: Harsh Chandel Date: Tue, 28 Oct 2025 15:06:22 +0530 Subject: [PATCH 5/6] fixup! add feature info Change-Id: I34ff969dc60321d59349679cc51645621dd10412 --- llvm/test/CodeGen/RISCV/features-info.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index 5e5f2b78e8869..f2db872d03eb7 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -135,6 +135,7 @@ ; CHECK-NEXT: shgatpa - 'Shgatpa' (SvNNx4 mode supported for all modes supported by satp, as well as Bare). ; CHECK-NEXT: shifted-zextw-fusion - Enable SLLI+SRLI to be fused when computing (shifted) word zero extension. ; CHECK-NEXT: shlcofideleg - 'Shlcofideleg' (Delegating LCOFI Interrupts to VS-mode). +; CHECK-NEXT: short-forward-branch-i-minmax - Enable short forward branch optimization for min,max instructions in Zbb. ; CHECK-NEXT: short-forward-branch-opt - Enable short forward branch optimization. ; CHECK-NEXT: shtvala - 'Shtvala' (htval provides all needed values). ; CHECK-NEXT: shvsatpa - 'Shvsatpa' (vsatp supports all modes supported by satp). From ce40422c9acef7fa6da246e565f338fb0cdcbd61 Mon Sep 17 00:00:00 2001 From: Harsh Chandel Date: Wed, 29 Oct 2025 11:23:57 +0530 Subject: [PATCH 6/6] fixup! Address comments Change-Id: I3c533c181fe1b811959f90a9dd2d22689d5115db --- llvm/lib/Target/RISCV/RISCVFeatures.td | 3 +- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 11 +- .../RISCV/short-forward-branch-opt-min-max.ll | 756 +++++++++++------- 3 files changed, 467 insertions(+), 303 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 753e86cec7477..330bb89ba3d88 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1852,7 +1852,8 @@ def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()" def TuneShortForwardBranchIMinMax : SubtargetFeature<"short-forward-branch-i-minmax", "HasShortForwardBranchIMinMax", - "true", "Enable short forward branch optimization for min,max instructions in Zbb", [TuneShortForwardBranchOpt]>; + "true", "Enable short forward branch optimization for min,max instructions in Zbb", + [TuneShortForwardBranchOpt]>; // Some subtargets require a S2V transfer buffer to move scalars into vectors. // FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 002456b250cc7..3a7013d9efae6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1740,7 +1740,7 @@ unsigned getPredicatedOpcode(unsigned Opcode) { static MachineInstr *canFoldAsPredicatedOp(Register Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII, - bool minmax) { + const RISCVSubtarget &STI) { if (!Reg.isVirtual()) return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) @@ -1749,7 +1749,7 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg, if (!MI) return nullptr; - if (!minmax && + if (!STI.hasShortForwardBranchIMinMax() && (MI->getOpcode() == RISCV::MAX || MI->getOpcode() == RISCV::MIN || MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU)) return nullptr; @@ -1816,12 +1816,11 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI, return nullptr; MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); - MachineInstr *DefMI = canFoldAsPredicatedOp( - MI.getOperand(5).getReg(), MRI, this, STI.hasShortForwardBranchIMinMax()); + MachineInstr *DefMI = + canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this, STI); bool Invert = !DefMI; if (!DefMI) - DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this, - STI.hasShortForwardBranchIMinMax()); + DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this, STI); if (!DefMI) return nullptr; diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll index 0a3f7181693df..05e06cea9967a 100644 --- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll +++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll @@ -1,61 +1,75 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc < %s -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I-NO-ZBB-SFB -; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I-NO-ZBB-SFB -; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb,+short-forward-branch-i-minmax | \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb | FileCheck %s --check-prefixes=RV32I-ZBB +; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb | FileCheck %s --check-prefixes=RV64I-ZBB +; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb,+short-forward-branch-opt | \ ; RUN: FileCheck %s --check-prefixes=RV32I-SFB-ZBB -; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+short-forward-branch-i-minmax | \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+short-forward-branch-opt | \ ; RUN: FileCheck %s --check-prefixes=RV64I-SFB-ZBB +; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb,+short-forward-branch-i-minmax | \ +; RUN: FileCheck %s --check-prefixes=RV32I-SFBIMinMax-ZBB +; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+short-forward-branch-i-minmax | \ +; RUN: FileCheck %s --check-prefixes=RV64I-SFBIMinMax-ZBB define i32 @select_example_smax(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { -; RV32I-NO-ZBB-SFB-LABEL: select_example_smax: -; RV32I-NO-ZBB-SFB: # %bb.0: # %entry -; RV32I-NO-ZBB-SFB-NEXT: bge a3, a0, .LBB0_3 -; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV32I-NO-ZBB-SFB-NEXT: beqz a2, .LBB0_4 -; RV32I-NO-ZBB-SFB-NEXT: .LBB0_2: # %entry -; RV32I-NO-ZBB-SFB-NEXT: ret -; RV32I-NO-ZBB-SFB-NEXT: .LBB0_3: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a3 -; RV32I-NO-ZBB-SFB-NEXT: bnez a2, .LBB0_2 -; RV32I-NO-ZBB-SFB-NEXT: .LBB0_4: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a1 -; RV32I-NO-ZBB-SFB-NEXT: ret -; -; RV64I-NO-ZBB-SFB-LABEL: select_example_smax: -; RV64I-NO-ZBB-SFB: # %bb.0: # %entry -; RV64I-NO-ZBB-SFB-NEXT: sext.w a0, a0 -; RV64I-NO-ZBB-SFB-NEXT: sext.w a3, a3 -; RV64I-NO-ZBB-SFB-NEXT: bge a3, a0, .LBB0_3 -; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB0_4 -; RV64I-NO-ZBB-SFB-NEXT: .LBB0_2: # %entry -; RV64I-NO-ZBB-SFB-NEXT: ret -; RV64I-NO-ZBB-SFB-NEXT: .LBB0_3: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 -; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB0_2 -; RV64I-NO-ZBB-SFB-NEXT: .LBB0_4: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 -; RV64I-NO-ZBB-SFB-NEXT: ret +; RV32I-ZBB-LABEL: select_example_smax: +; RV32I-ZBB: # %bb.0: # %entry +; RV32I-ZBB-NEXT: beqz a2, .LBB0_2 +; RV32I-ZBB-NEXT: # %bb.1: +; RV32I-ZBB-NEXT: max a1, a0, a3 +; RV32I-ZBB-NEXT: .LBB0_2: # %entry +; RV32I-ZBB-NEXT: mv a0, a1 +; RV32I-ZBB-NEXT: ret +; +; RV64I-ZBB-LABEL: select_example_smax: +; RV64I-ZBB: # %bb.0: # %entry +; RV64I-ZBB-NEXT: beqz a2, .LBB0_2 +; RV64I-ZBB-NEXT: # %bb.1: +; RV64I-ZBB-NEXT: sext.w a3, a3 +; RV64I-ZBB-NEXT: sext.w a0, a0 +; RV64I-ZBB-NEXT: max a1, a0, a3 +; RV64I-ZBB-NEXT: .LBB0_2: # %entry +; RV64I-ZBB-NEXT: mv a0, a1 +; RV64I-ZBB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_smax: ; RV32I-SFB-ZBB: # %bb.0: # %entry -; RV32I-SFB-ZBB-NEXT: beqz a2, .LBB0_2 +; RV32I-SFB-ZBB-NEXT: max a0, a0, a3 +; RV32I-SFB-ZBB-NEXT: bnez a2, .LBB0_2 ; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry -; RV32I-SFB-ZBB-NEXT: max a1, a0, a3 -; RV32I-SFB-ZBB-NEXT: .LBB0_2: # %entry ; RV32I-SFB-ZBB-NEXT: mv a0, a1 +; RV32I-SFB-ZBB-NEXT: .LBB0_2: # %entry ; RV32I-SFB-ZBB-NEXT: ret ; ; RV64I-SFB-ZBB-LABEL: select_example_smax: ; RV64I-SFB-ZBB: # %bb.0: # %entry ; RV64I-SFB-ZBB-NEXT: sext.w a3, a3 ; RV64I-SFB-ZBB-NEXT: sext.w a0, a0 -; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB0_2 +; RV64I-SFB-ZBB-NEXT: max a0, a0, a3 +; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB0_2 ; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry -; RV64I-SFB-ZBB-NEXT: max a1, a0, a3 -; RV64I-SFB-ZBB-NEXT: .LBB0_2: # %entry ; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: .LBB0_2: # %entry ; RV64I-SFB-ZBB-NEXT: ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smax: +; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB0_2 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: max a1, a0, a3 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB0_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a1 +; RV32I-SFBIMinMax-ZBB-NEXT: ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smax: +; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a3, a3 +; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a0, a0 +; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB0_2 +; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: max a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT: .LBB0_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT: ret entry: %res = call i32 @llvm.smax.i32(i32 %a, i32 %y) %sel = select i1 %x, i32 %res, i32 %b @@ -63,55 +77,65 @@ entry: } define i32 @select_example_smin(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { -; RV32I-NO-ZBB-SFB-LABEL: select_example_smin: -; RV32I-NO-ZBB-SFB: # %bb.0: # %entry -; RV32I-NO-ZBB-SFB-NEXT: bge a0, a3, .LBB1_3 -; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV32I-NO-ZBB-SFB-NEXT: beqz a2, .LBB1_4 -; RV32I-NO-ZBB-SFB-NEXT: .LBB1_2: # %entry -; RV32I-NO-ZBB-SFB-NEXT: ret -; RV32I-NO-ZBB-SFB-NEXT: .LBB1_3: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a3 -; RV32I-NO-ZBB-SFB-NEXT: bnez a2, .LBB1_2 -; RV32I-NO-ZBB-SFB-NEXT: .LBB1_4: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a1 -; RV32I-NO-ZBB-SFB-NEXT: ret -; -; RV64I-NO-ZBB-SFB-LABEL: select_example_smin: -; RV64I-NO-ZBB-SFB: # %bb.0: # %entry -; RV64I-NO-ZBB-SFB-NEXT: sext.w a3, a3 -; RV64I-NO-ZBB-SFB-NEXT: sext.w a0, a0 -; RV64I-NO-ZBB-SFB-NEXT: bge a0, a3, .LBB1_3 -; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB1_4 -; RV64I-NO-ZBB-SFB-NEXT: .LBB1_2: # %entry -; RV64I-NO-ZBB-SFB-NEXT: ret -; RV64I-NO-ZBB-SFB-NEXT: .LBB1_3: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 -; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB1_2 -; RV64I-NO-ZBB-SFB-NEXT: .LBB1_4: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 -; RV64I-NO-ZBB-SFB-NEXT: ret +; RV32I-ZBB-LABEL: select_example_smin: +; RV32I-ZBB: # %bb.0: # %entry +; RV32I-ZBB-NEXT: beqz a2, .LBB1_2 +; RV32I-ZBB-NEXT: # %bb.1: +; RV32I-ZBB-NEXT: min a1, a0, a3 +; RV32I-ZBB-NEXT: .LBB1_2: # %entry +; RV32I-ZBB-NEXT: mv a0, a1 +; RV32I-ZBB-NEXT: ret +; +; RV64I-ZBB-LABEL: select_example_smin: +; RV64I-ZBB: # %bb.0: # %entry +; RV64I-ZBB-NEXT: beqz a2, .LBB1_2 +; RV64I-ZBB-NEXT: # %bb.1: +; RV64I-ZBB-NEXT: sext.w a3, a3 +; RV64I-ZBB-NEXT: sext.w a0, a0 +; RV64I-ZBB-NEXT: min a1, a0, a3 +; RV64I-ZBB-NEXT: .LBB1_2: # %entry +; RV64I-ZBB-NEXT: mv a0, a1 +; RV64I-ZBB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_smin: ; RV32I-SFB-ZBB: # %bb.0: # %entry -; RV32I-SFB-ZBB-NEXT: beqz a2, .LBB1_2 +; RV32I-SFB-ZBB-NEXT: min a0, a0, a3 +; RV32I-SFB-ZBB-NEXT: bnez a2, .LBB1_2 ; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry -; RV32I-SFB-ZBB-NEXT: min a1, a0, a3 -; RV32I-SFB-ZBB-NEXT: .LBB1_2: # %entry ; RV32I-SFB-ZBB-NEXT: mv a0, a1 +; RV32I-SFB-ZBB-NEXT: .LBB1_2: # %entry ; RV32I-SFB-ZBB-NEXT: ret ; ; RV64I-SFB-ZBB-LABEL: select_example_smin: ; RV64I-SFB-ZBB: # %bb.0: # %entry ; RV64I-SFB-ZBB-NEXT: sext.w a3, a3 ; RV64I-SFB-ZBB-NEXT: sext.w a0, a0 -; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB1_2 +; RV64I-SFB-ZBB-NEXT: min a0, a0, a3 +; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB1_2 ; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry -; RV64I-SFB-ZBB-NEXT: min a1, a0, a3 -; RV64I-SFB-ZBB-NEXT: .LBB1_2: # %entry ; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: .LBB1_2: # %entry ; RV64I-SFB-ZBB-NEXT: ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smin: +; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB1_2 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: min a1, a0, a3 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB1_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a1 +; RV32I-SFBIMinMax-ZBB-NEXT: ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smin: +; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a3, a3 +; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a0, a0 +; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB1_2 +; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: min a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT: .LBB1_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT: ret entry: %res = call i32 @llvm.smin.i32(i32 %a, i32 %y) %sel = select i1 %x, i32 %res, i32 %b @@ -119,55 +143,65 @@ entry: } define i32 @select_example_umax(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { -; RV32I-NO-ZBB-SFB-LABEL: select_example_umax: -; RV32I-NO-ZBB-SFB: # %bb.0: # %entry -; RV32I-NO-ZBB-SFB-NEXT: bgeu a3, a0, .LBB2_3 -; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV32I-NO-ZBB-SFB-NEXT: beqz a2, .LBB2_4 -; RV32I-NO-ZBB-SFB-NEXT: .LBB2_2: # %entry -; RV32I-NO-ZBB-SFB-NEXT: ret -; RV32I-NO-ZBB-SFB-NEXT: .LBB2_3: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a3 -; RV32I-NO-ZBB-SFB-NEXT: bnez a2, .LBB2_2 -; RV32I-NO-ZBB-SFB-NEXT: .LBB2_4: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a1 -; RV32I-NO-ZBB-SFB-NEXT: ret -; -; RV64I-NO-ZBB-SFB-LABEL: select_example_umax: -; RV64I-NO-ZBB-SFB: # %bb.0: # %entry -; RV64I-NO-ZBB-SFB-NEXT: sext.w a0, a0 -; RV64I-NO-ZBB-SFB-NEXT: sext.w a3, a3 -; RV64I-NO-ZBB-SFB-NEXT: bgeu a3, a0, .LBB2_3 -; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB2_4 -; RV64I-NO-ZBB-SFB-NEXT: .LBB2_2: # %entry -; RV64I-NO-ZBB-SFB-NEXT: ret -; RV64I-NO-ZBB-SFB-NEXT: .LBB2_3: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 -; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB2_2 -; RV64I-NO-ZBB-SFB-NEXT: .LBB2_4: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 -; RV64I-NO-ZBB-SFB-NEXT: ret +; RV32I-ZBB-LABEL: select_example_umax: +; RV32I-ZBB: # %bb.0: # %entry +; RV32I-ZBB-NEXT: beqz a2, .LBB2_2 +; RV32I-ZBB-NEXT: # %bb.1: +; RV32I-ZBB-NEXT: maxu a1, a0, a3 +; RV32I-ZBB-NEXT: .LBB2_2: # %entry +; RV32I-ZBB-NEXT: mv a0, a1 +; RV32I-ZBB-NEXT: ret +; +; RV64I-ZBB-LABEL: select_example_umax: +; RV64I-ZBB: # %bb.0: # %entry +; RV64I-ZBB-NEXT: beqz a2, .LBB2_2 +; RV64I-ZBB-NEXT: # %bb.1: +; RV64I-ZBB-NEXT: sext.w a3, a3 +; RV64I-ZBB-NEXT: sext.w a0, a0 +; RV64I-ZBB-NEXT: maxu a1, a0, a3 +; RV64I-ZBB-NEXT: .LBB2_2: # %entry +; RV64I-ZBB-NEXT: mv a0, a1 +; RV64I-ZBB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_umax: ; RV32I-SFB-ZBB: # %bb.0: # %entry -; RV32I-SFB-ZBB-NEXT: beqz a2, .LBB2_2 +; RV32I-SFB-ZBB-NEXT: maxu a0, a0, a3 +; RV32I-SFB-ZBB-NEXT: bnez a2, .LBB2_2 ; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry -; RV32I-SFB-ZBB-NEXT: maxu a1, a0, a3 -; RV32I-SFB-ZBB-NEXT: .LBB2_2: # %entry ; RV32I-SFB-ZBB-NEXT: mv a0, a1 +; RV32I-SFB-ZBB-NEXT: .LBB2_2: # %entry ; RV32I-SFB-ZBB-NEXT: ret ; ; RV64I-SFB-ZBB-LABEL: select_example_umax: ; RV64I-SFB-ZBB: # %bb.0: # %entry ; RV64I-SFB-ZBB-NEXT: sext.w a3, a3 ; RV64I-SFB-ZBB-NEXT: sext.w a0, a0 -; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB2_2 +; RV64I-SFB-ZBB-NEXT: maxu a0, a0, a3 +; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB2_2 ; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry -; RV64I-SFB-ZBB-NEXT: maxu a1, a0, a3 -; RV64I-SFB-ZBB-NEXT: .LBB2_2: # %entry ; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: .LBB2_2: # %entry ; RV64I-SFB-ZBB-NEXT: ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umax: +; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB2_2 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: maxu a1, a0, a3 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB2_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a1 +; RV32I-SFBIMinMax-ZBB-NEXT: ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umax: +; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a3, a3 +; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a0, a0 +; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB2_2 +; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: maxu a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT: .LBB2_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT: ret entry: %res = call i32 @llvm.umax.i32(i32 %a, i32 %y) %sel = select i1 %x, i32 %res, i32 %b @@ -175,55 +209,65 @@ entry: } define i32 @select_example_umin(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { -; RV32I-NO-ZBB-SFB-LABEL: select_example_umin: -; RV32I-NO-ZBB-SFB: # %bb.0: # %entry -; RV32I-NO-ZBB-SFB-NEXT: bgeu a0, a3, .LBB3_3 -; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV32I-NO-ZBB-SFB-NEXT: beqz a2, .LBB3_4 -; RV32I-NO-ZBB-SFB-NEXT: .LBB3_2: # %entry -; RV32I-NO-ZBB-SFB-NEXT: ret -; RV32I-NO-ZBB-SFB-NEXT: .LBB3_3: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a3 -; RV32I-NO-ZBB-SFB-NEXT: bnez a2, .LBB3_2 -; RV32I-NO-ZBB-SFB-NEXT: .LBB3_4: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a1 -; RV32I-NO-ZBB-SFB-NEXT: ret -; -; RV64I-NO-ZBB-SFB-LABEL: select_example_umin: -; RV64I-NO-ZBB-SFB: # %bb.0: # %entry -; RV64I-NO-ZBB-SFB-NEXT: sext.w a3, a3 -; RV64I-NO-ZBB-SFB-NEXT: sext.w a0, a0 -; RV64I-NO-ZBB-SFB-NEXT: bgeu a0, a3, .LBB3_3 -; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB3_4 -; RV64I-NO-ZBB-SFB-NEXT: .LBB3_2: # %entry -; RV64I-NO-ZBB-SFB-NEXT: ret -; RV64I-NO-ZBB-SFB-NEXT: .LBB3_3: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 -; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB3_2 -; RV64I-NO-ZBB-SFB-NEXT: .LBB3_4: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 -; RV64I-NO-ZBB-SFB-NEXT: ret +; RV32I-ZBB-LABEL: select_example_umin: +; RV32I-ZBB: # %bb.0: # %entry +; RV32I-ZBB-NEXT: beqz a2, .LBB3_2 +; RV32I-ZBB-NEXT: # %bb.1: +; RV32I-ZBB-NEXT: minu a1, a0, a3 +; RV32I-ZBB-NEXT: .LBB3_2: # %entry +; RV32I-ZBB-NEXT: mv a0, a1 +; RV32I-ZBB-NEXT: ret +; +; RV64I-ZBB-LABEL: select_example_umin: +; RV64I-ZBB: # %bb.0: # %entry +; RV64I-ZBB-NEXT: beqz a2, .LBB3_2 +; RV64I-ZBB-NEXT: # %bb.1: +; RV64I-ZBB-NEXT: sext.w a3, a3 +; RV64I-ZBB-NEXT: sext.w a0, a0 +; RV64I-ZBB-NEXT: minu a1, a0, a3 +; RV64I-ZBB-NEXT: .LBB3_2: # %entry +; RV64I-ZBB-NEXT: mv a0, a1 +; RV64I-ZBB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_umin: ; RV32I-SFB-ZBB: # %bb.0: # %entry -; RV32I-SFB-ZBB-NEXT: beqz a2, .LBB3_2 +; RV32I-SFB-ZBB-NEXT: minu a0, a0, a3 +; RV32I-SFB-ZBB-NEXT: bnez a2, .LBB3_2 ; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry -; RV32I-SFB-ZBB-NEXT: minu a1, a0, a3 -; RV32I-SFB-ZBB-NEXT: .LBB3_2: # %entry ; RV32I-SFB-ZBB-NEXT: mv a0, a1 +; RV32I-SFB-ZBB-NEXT: .LBB3_2: # %entry ; RV32I-SFB-ZBB-NEXT: ret ; ; RV64I-SFB-ZBB-LABEL: select_example_umin: ; RV64I-SFB-ZBB: # %bb.0: # %entry ; RV64I-SFB-ZBB-NEXT: sext.w a3, a3 ; RV64I-SFB-ZBB-NEXT: sext.w a0, a0 -; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB3_2 +; RV64I-SFB-ZBB-NEXT: minu a0, a0, a3 +; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB3_2 ; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry -; RV64I-SFB-ZBB-NEXT: minu a1, a0, a3 -; RV64I-SFB-ZBB-NEXT: .LBB3_2: # %entry ; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: .LBB3_2: # %entry ; RV64I-SFB-ZBB-NEXT: ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umin: +; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB3_2 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: minu a1, a0, a3 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB3_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a1 +; RV32I-SFBIMinMax-ZBB-NEXT: ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umin: +; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a3, a3 +; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a0, a0 +; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB3_2 +; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: minu a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT: .LBB3_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT: ret entry: %res = call i32 @llvm.umin.i32(i32 %a, i32 %y) %sel = select i1 %x, i32 %res, i32 %b @@ -231,41 +275,36 @@ entry: } define i64 @select_example_smax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { -; RV32I-NO-ZBB-SFB-LABEL: select_example_smax_1: -; RV32I-NO-ZBB-SFB: # %bb.0: # %entry -; RV32I-NO-ZBB-SFB-NEXT: beq a1, a6, .LBB4_2 -; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV32I-NO-ZBB-SFB-NEXT: slt a7, a6, a1 -; RV32I-NO-ZBB-SFB-NEXT: beqz a7, .LBB4_3 -; RV32I-NO-ZBB-SFB-NEXT: j .LBB4_4 -; RV32I-NO-ZBB-SFB-NEXT: .LBB4_2: -; RV32I-NO-ZBB-SFB-NEXT: sltu a7, a5, a0 -; RV32I-NO-ZBB-SFB-NEXT: bnez a7, .LBB4_4 -; RV32I-NO-ZBB-SFB-NEXT: .LBB4_3: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a1, a6 -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a5 -; RV32I-NO-ZBB-SFB-NEXT: .LBB4_4: # %entry -; RV32I-NO-ZBB-SFB-NEXT: beqz a4, .LBB4_6 -; RV32I-NO-ZBB-SFB-NEXT: # %bb.5: # %entry -; RV32I-NO-ZBB-SFB-NEXT: ret -; RV32I-NO-ZBB-SFB-NEXT: .LBB4_6: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a2 -; RV32I-NO-ZBB-SFB-NEXT: mv a1, a3 -; RV32I-NO-ZBB-SFB-NEXT: ret -; -; RV64I-NO-ZBB-SFB-LABEL: select_example_smax_1: -; RV64I-NO-ZBB-SFB: # %bb.0: # %entry -; RV64I-NO-ZBB-SFB-NEXT: bge a3, a0, .LBB4_3 -; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB4_4 -; RV64I-NO-ZBB-SFB-NEXT: .LBB4_2: # %entry -; RV64I-NO-ZBB-SFB-NEXT: ret -; RV64I-NO-ZBB-SFB-NEXT: .LBB4_3: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 -; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB4_2 -; RV64I-NO-ZBB-SFB-NEXT: .LBB4_4: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 -; RV64I-NO-ZBB-SFB-NEXT: ret +; RV32I-ZBB-LABEL: select_example_smax_1: +; RV32I-ZBB: # %bb.0: # %entry +; RV32I-ZBB-NEXT: beq a1, a6, .LBB4_2 +; RV32I-ZBB-NEXT: # %bb.1: # %entry +; RV32I-ZBB-NEXT: slt a7, a6, a1 +; RV32I-ZBB-NEXT: beqz a7, .LBB4_3 +; RV32I-ZBB-NEXT: j .LBB4_4 +; RV32I-ZBB-NEXT: .LBB4_2: +; RV32I-ZBB-NEXT: sltu a7, a5, a0 +; RV32I-ZBB-NEXT: bnez a7, .LBB4_4 +; RV32I-ZBB-NEXT: .LBB4_3: # %entry +; RV32I-ZBB-NEXT: mv a1, a6 +; RV32I-ZBB-NEXT: mv a0, a5 +; RV32I-ZBB-NEXT: .LBB4_4: # %entry +; RV32I-ZBB-NEXT: beqz a4, .LBB4_6 +; RV32I-ZBB-NEXT: # %bb.5: # %entry +; RV32I-ZBB-NEXT: ret +; RV32I-ZBB-NEXT: .LBB4_6: # %entry +; RV32I-ZBB-NEXT: mv a0, a2 +; RV32I-ZBB-NEXT: mv a1, a3 +; RV32I-ZBB-NEXT: ret +; +; RV64I-ZBB-LABEL: select_example_smax_1: +; RV64I-ZBB: # %bb.0: # %entry +; RV64I-ZBB-NEXT: beqz a2, .LBB4_2 +; RV64I-ZBB-NEXT: # %bb.1: +; RV64I-ZBB-NEXT: max a1, a0, a3 +; RV64I-ZBB-NEXT: .LBB4_2: # %entry +; RV64I-ZBB-NEXT: mv a0, a1 +; RV64I-ZBB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_smax_1: ; RV32I-SFB-ZBB: # %bb.0: # %entry @@ -295,12 +334,47 @@ define i64 @select_example_smax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { ; ; RV64I-SFB-ZBB-LABEL: select_example_smax_1: ; RV64I-SFB-ZBB: # %bb.0: # %entry -; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB4_2 +; RV64I-SFB-ZBB-NEXT: max a0, a0, a3 +; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB4_2 ; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry -; RV64I-SFB-ZBB-NEXT: max a1, a0, a3 -; RV64I-SFB-ZBB-NEXT: .LBB4_2: # %entry ; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: .LBB4_2: # %entry ; RV64I-SFB-ZBB-NEXT: ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smax_1: +; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: sltu a7, a5, a0 +; RV32I-SFBIMinMax-ZBB-NEXT: slt t0, a6, a1 +; RV32I-SFBIMinMax-ZBB-NEXT: bne a1, a6, .LBB4_2 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv t0, a7 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB4_4 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.3: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a6 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_4: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB4_6 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.5: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a5 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_6: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB4_8 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.7: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a2 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_8: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB4_10 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.9: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a3 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_10: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smax_1: +; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB4_2 +; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: max a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT: .LBB4_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT: ret entry: %res = call i64 @llvm.smax.i64(i64 %a, i64 %y) %sel = select i1 %x, i64 %res, i64 %b @@ -308,41 +382,36 @@ entry: } define i64 @select_example_smin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { -; RV32I-NO-ZBB-SFB-LABEL: select_example_smin_1: -; RV32I-NO-ZBB-SFB: # %bb.0: # %entry -; RV32I-NO-ZBB-SFB-NEXT: beq a1, a6, .LBB5_2 -; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV32I-NO-ZBB-SFB-NEXT: slt a7, a1, a6 -; RV32I-NO-ZBB-SFB-NEXT: beqz a7, .LBB5_3 -; RV32I-NO-ZBB-SFB-NEXT: j .LBB5_4 -; RV32I-NO-ZBB-SFB-NEXT: .LBB5_2: -; RV32I-NO-ZBB-SFB-NEXT: sltu a7, a0, a5 -; RV32I-NO-ZBB-SFB-NEXT: bnez a7, .LBB5_4 -; RV32I-NO-ZBB-SFB-NEXT: .LBB5_3: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a1, a6 -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a5 -; RV32I-NO-ZBB-SFB-NEXT: .LBB5_4: # %entry -; RV32I-NO-ZBB-SFB-NEXT: beqz a4, .LBB5_6 -; RV32I-NO-ZBB-SFB-NEXT: # %bb.5: # %entry -; RV32I-NO-ZBB-SFB-NEXT: ret -; RV32I-NO-ZBB-SFB-NEXT: .LBB5_6: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a2 -; RV32I-NO-ZBB-SFB-NEXT: mv a1, a3 -; RV32I-NO-ZBB-SFB-NEXT: ret -; -; RV64I-NO-ZBB-SFB-LABEL: select_example_smin_1: -; RV64I-NO-ZBB-SFB: # %bb.0: # %entry -; RV64I-NO-ZBB-SFB-NEXT: bge a0, a3, .LBB5_3 -; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB5_4 -; RV64I-NO-ZBB-SFB-NEXT: .LBB5_2: # %entry -; RV64I-NO-ZBB-SFB-NEXT: ret -; RV64I-NO-ZBB-SFB-NEXT: .LBB5_3: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 -; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB5_2 -; RV64I-NO-ZBB-SFB-NEXT: .LBB5_4: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 -; RV64I-NO-ZBB-SFB-NEXT: ret +; RV32I-ZBB-LABEL: select_example_smin_1: +; RV32I-ZBB: # %bb.0: # %entry +; RV32I-ZBB-NEXT: beq a1, a6, .LBB5_2 +; RV32I-ZBB-NEXT: # %bb.1: # %entry +; RV32I-ZBB-NEXT: slt a7, a1, a6 +; RV32I-ZBB-NEXT: beqz a7, .LBB5_3 +; RV32I-ZBB-NEXT: j .LBB5_4 +; RV32I-ZBB-NEXT: .LBB5_2: +; RV32I-ZBB-NEXT: sltu a7, a0, a5 +; RV32I-ZBB-NEXT: bnez a7, .LBB5_4 +; RV32I-ZBB-NEXT: .LBB5_3: # %entry +; RV32I-ZBB-NEXT: mv a1, a6 +; RV32I-ZBB-NEXT: mv a0, a5 +; RV32I-ZBB-NEXT: .LBB5_4: # %entry +; RV32I-ZBB-NEXT: beqz a4, .LBB5_6 +; RV32I-ZBB-NEXT: # %bb.5: # %entry +; RV32I-ZBB-NEXT: ret +; RV32I-ZBB-NEXT: .LBB5_6: # %entry +; RV32I-ZBB-NEXT: mv a0, a2 +; RV32I-ZBB-NEXT: mv a1, a3 +; RV32I-ZBB-NEXT: ret +; +; RV64I-ZBB-LABEL: select_example_smin_1: +; RV64I-ZBB: # %bb.0: # %entry +; RV64I-ZBB-NEXT: beqz a2, .LBB5_2 +; RV64I-ZBB-NEXT: # %bb.1: +; RV64I-ZBB-NEXT: min a1, a0, a3 +; RV64I-ZBB-NEXT: .LBB5_2: # %entry +; RV64I-ZBB-NEXT: mv a0, a1 +; RV64I-ZBB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_smin_1: ; RV32I-SFB-ZBB: # %bb.0: # %entry @@ -372,12 +441,47 @@ define i64 @select_example_smin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { ; ; RV64I-SFB-ZBB-LABEL: select_example_smin_1: ; RV64I-SFB-ZBB: # %bb.0: # %entry -; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB5_2 +; RV64I-SFB-ZBB-NEXT: min a0, a0, a3 +; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB5_2 ; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry -; RV64I-SFB-ZBB-NEXT: min a1, a0, a3 -; RV64I-SFB-ZBB-NEXT: .LBB5_2: # %entry ; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: .LBB5_2: # %entry ; RV64I-SFB-ZBB-NEXT: ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smin_1: +; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: sltu a7, a0, a5 +; RV32I-SFBIMinMax-ZBB-NEXT: slt t0, a1, a6 +; RV32I-SFBIMinMax-ZBB-NEXT: bne a1, a6, .LBB5_2 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv t0, a7 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB5_4 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.3: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a6 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_4: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB5_6 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.5: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a5 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_6: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB5_8 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.7: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a2 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_8: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB5_10 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.9: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a3 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_10: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smin_1: +; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB5_2 +; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: min a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT: .LBB5_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT: ret entry: %res = call i64 @llvm.smin.i64(i64 %a, i64 %y) %sel = select i1 %x, i64 %res, i64 %b @@ -385,41 +489,36 @@ entry: } define i64 @select_example_umax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { -; RV32I-NO-ZBB-SFB-LABEL: select_example_umax_1: -; RV32I-NO-ZBB-SFB: # %bb.0: # %entry -; RV32I-NO-ZBB-SFB-NEXT: beq a1, a6, .LBB6_2 -; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV32I-NO-ZBB-SFB-NEXT: sltu a7, a6, a1 -; RV32I-NO-ZBB-SFB-NEXT: beqz a7, .LBB6_3 -; RV32I-NO-ZBB-SFB-NEXT: j .LBB6_4 -; RV32I-NO-ZBB-SFB-NEXT: .LBB6_2: -; RV32I-NO-ZBB-SFB-NEXT: sltu a7, a5, a0 -; RV32I-NO-ZBB-SFB-NEXT: bnez a7, .LBB6_4 -; RV32I-NO-ZBB-SFB-NEXT: .LBB6_3: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a1, a6 -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a5 -; RV32I-NO-ZBB-SFB-NEXT: .LBB6_4: # %entry -; RV32I-NO-ZBB-SFB-NEXT: beqz a4, .LBB6_6 -; RV32I-NO-ZBB-SFB-NEXT: # %bb.5: # %entry -; RV32I-NO-ZBB-SFB-NEXT: ret -; RV32I-NO-ZBB-SFB-NEXT: .LBB6_6: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a2 -; RV32I-NO-ZBB-SFB-NEXT: mv a1, a3 -; RV32I-NO-ZBB-SFB-NEXT: ret -; -; RV64I-NO-ZBB-SFB-LABEL: select_example_umax_1: -; RV64I-NO-ZBB-SFB: # %bb.0: # %entry -; RV64I-NO-ZBB-SFB-NEXT: bgeu a3, a0, .LBB6_3 -; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB6_4 -; RV64I-NO-ZBB-SFB-NEXT: .LBB6_2: # %entry -; RV64I-NO-ZBB-SFB-NEXT: ret -; RV64I-NO-ZBB-SFB-NEXT: .LBB6_3: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 -; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB6_2 -; RV64I-NO-ZBB-SFB-NEXT: .LBB6_4: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 -; RV64I-NO-ZBB-SFB-NEXT: ret +; RV32I-ZBB-LABEL: select_example_umax_1: +; RV32I-ZBB: # %bb.0: # %entry +; RV32I-ZBB-NEXT: beq a1, a6, .LBB6_2 +; RV32I-ZBB-NEXT: # %bb.1: # %entry +; RV32I-ZBB-NEXT: sltu a7, a6, a1 +; RV32I-ZBB-NEXT: beqz a7, .LBB6_3 +; RV32I-ZBB-NEXT: j .LBB6_4 +; RV32I-ZBB-NEXT: .LBB6_2: +; RV32I-ZBB-NEXT: sltu a7, a5, a0 +; RV32I-ZBB-NEXT: bnez a7, .LBB6_4 +; RV32I-ZBB-NEXT: .LBB6_3: # %entry +; RV32I-ZBB-NEXT: mv a1, a6 +; RV32I-ZBB-NEXT: mv a0, a5 +; RV32I-ZBB-NEXT: .LBB6_4: # %entry +; RV32I-ZBB-NEXT: beqz a4, .LBB6_6 +; RV32I-ZBB-NEXT: # %bb.5: # %entry +; RV32I-ZBB-NEXT: ret +; RV32I-ZBB-NEXT: .LBB6_6: # %entry +; RV32I-ZBB-NEXT: mv a0, a2 +; RV32I-ZBB-NEXT: mv a1, a3 +; RV32I-ZBB-NEXT: ret +; +; RV64I-ZBB-LABEL: select_example_umax_1: +; RV64I-ZBB: # %bb.0: # %entry +; RV64I-ZBB-NEXT: beqz a2, .LBB6_2 +; RV64I-ZBB-NEXT: # %bb.1: +; RV64I-ZBB-NEXT: maxu a1, a0, a3 +; RV64I-ZBB-NEXT: .LBB6_2: # %entry +; RV64I-ZBB-NEXT: mv a0, a1 +; RV64I-ZBB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_umax_1: ; RV32I-SFB-ZBB: # %bb.0: # %entry @@ -449,12 +548,47 @@ define i64 @select_example_umax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { ; ; RV64I-SFB-ZBB-LABEL: select_example_umax_1: ; RV64I-SFB-ZBB: # %bb.0: # %entry -; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB6_2 +; RV64I-SFB-ZBB-NEXT: maxu a0, a0, a3 +; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB6_2 ; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry -; RV64I-SFB-ZBB-NEXT: maxu a1, a0, a3 -; RV64I-SFB-ZBB-NEXT: .LBB6_2: # %entry ; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: .LBB6_2: # %entry ; RV64I-SFB-ZBB-NEXT: ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umax_1: +; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: sltu a7, a5, a0 +; RV32I-SFBIMinMax-ZBB-NEXT: sltu t0, a6, a1 +; RV32I-SFBIMinMax-ZBB-NEXT: bne a1, a6, .LBB6_2 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv t0, a7 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB6_4 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.3: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a6 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_4: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB6_6 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.5: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a5 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_6: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB6_8 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.7: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a2 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_8: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB6_10 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.9: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a3 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_10: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umax_1: +; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB6_2 +; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: maxu a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT: .LBB6_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT: ret entry: %res = call i64 @llvm.umax.i64(i64 %a, i64 %y) %sel = select i1 %x, i64 %res, i64 %b @@ -462,41 +596,36 @@ entry: } define i64 @select_example_umin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { -; RV32I-NO-ZBB-SFB-LABEL: select_example_umin_1: -; RV32I-NO-ZBB-SFB: # %bb.0: # %entry -; RV32I-NO-ZBB-SFB-NEXT: beq a1, a6, .LBB7_2 -; RV32I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV32I-NO-ZBB-SFB-NEXT: sltu a7, a1, a6 -; RV32I-NO-ZBB-SFB-NEXT: beqz a7, .LBB7_3 -; RV32I-NO-ZBB-SFB-NEXT: j .LBB7_4 -; RV32I-NO-ZBB-SFB-NEXT: .LBB7_2: -; RV32I-NO-ZBB-SFB-NEXT: sltu a7, a0, a5 -; RV32I-NO-ZBB-SFB-NEXT: bnez a7, .LBB7_4 -; RV32I-NO-ZBB-SFB-NEXT: .LBB7_3: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a1, a6 -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a5 -; RV32I-NO-ZBB-SFB-NEXT: .LBB7_4: # %entry -; RV32I-NO-ZBB-SFB-NEXT: beqz a4, .LBB7_6 -; RV32I-NO-ZBB-SFB-NEXT: # %bb.5: # %entry -; RV32I-NO-ZBB-SFB-NEXT: ret -; RV32I-NO-ZBB-SFB-NEXT: .LBB7_6: # %entry -; RV32I-NO-ZBB-SFB-NEXT: mv a0, a2 -; RV32I-NO-ZBB-SFB-NEXT: mv a1, a3 -; RV32I-NO-ZBB-SFB-NEXT: ret -; -; RV64I-NO-ZBB-SFB-LABEL: select_example_umin_1: -; RV64I-NO-ZBB-SFB: # %bb.0: # %entry -; RV64I-NO-ZBB-SFB-NEXT: bgeu a0, a3, .LBB7_3 -; RV64I-NO-ZBB-SFB-NEXT: # %bb.1: # %entry -; RV64I-NO-ZBB-SFB-NEXT: beqz a2, .LBB7_4 -; RV64I-NO-ZBB-SFB-NEXT: .LBB7_2: # %entry -; RV64I-NO-ZBB-SFB-NEXT: ret -; RV64I-NO-ZBB-SFB-NEXT: .LBB7_3: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a3 -; RV64I-NO-ZBB-SFB-NEXT: bnez a2, .LBB7_2 -; RV64I-NO-ZBB-SFB-NEXT: .LBB7_4: # %entry -; RV64I-NO-ZBB-SFB-NEXT: mv a0, a1 -; RV64I-NO-ZBB-SFB-NEXT: ret +; RV32I-ZBB-LABEL: select_example_umin_1: +; RV32I-ZBB: # %bb.0: # %entry +; RV32I-ZBB-NEXT: beq a1, a6, .LBB7_2 +; RV32I-ZBB-NEXT: # %bb.1: # %entry +; RV32I-ZBB-NEXT: sltu a7, a1, a6 +; RV32I-ZBB-NEXT: beqz a7, .LBB7_3 +; RV32I-ZBB-NEXT: j .LBB7_4 +; RV32I-ZBB-NEXT: .LBB7_2: +; RV32I-ZBB-NEXT: sltu a7, a0, a5 +; RV32I-ZBB-NEXT: bnez a7, .LBB7_4 +; RV32I-ZBB-NEXT: .LBB7_3: # %entry +; RV32I-ZBB-NEXT: mv a1, a6 +; RV32I-ZBB-NEXT: mv a0, a5 +; RV32I-ZBB-NEXT: .LBB7_4: # %entry +; RV32I-ZBB-NEXT: beqz a4, .LBB7_6 +; RV32I-ZBB-NEXT: # %bb.5: # %entry +; RV32I-ZBB-NEXT: ret +; RV32I-ZBB-NEXT: .LBB7_6: # %entry +; RV32I-ZBB-NEXT: mv a0, a2 +; RV32I-ZBB-NEXT: mv a1, a3 +; RV32I-ZBB-NEXT: ret +; +; RV64I-ZBB-LABEL: select_example_umin_1: +; RV64I-ZBB: # %bb.0: # %entry +; RV64I-ZBB-NEXT: beqz a2, .LBB7_2 +; RV64I-ZBB-NEXT: # %bb.1: +; RV64I-ZBB-NEXT: minu a1, a0, a3 +; RV64I-ZBB-NEXT: .LBB7_2: # %entry +; RV64I-ZBB-NEXT: mv a0, a1 +; RV64I-ZBB-NEXT: ret ; ; RV32I-SFB-ZBB-LABEL: select_example_umin_1: ; RV32I-SFB-ZBB: # %bb.0: # %entry @@ -526,12 +655,47 @@ define i64 @select_example_umin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { ; ; RV64I-SFB-ZBB-LABEL: select_example_umin_1: ; RV64I-SFB-ZBB: # %bb.0: # %entry -; RV64I-SFB-ZBB-NEXT: beqz a2, .LBB7_2 +; RV64I-SFB-ZBB-NEXT: minu a0, a0, a3 +; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB7_2 ; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry -; RV64I-SFB-ZBB-NEXT: minu a1, a0, a3 -; RV64I-SFB-ZBB-NEXT: .LBB7_2: # %entry ; RV64I-SFB-ZBB-NEXT: mv a0, a1 +; RV64I-SFB-ZBB-NEXT: .LBB7_2: # %entry ; RV64I-SFB-ZBB-NEXT: ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umin_1: +; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: sltu a7, a0, a5 +; RV32I-SFBIMinMax-ZBB-NEXT: sltu t0, a1, a6 +; RV32I-SFBIMinMax-ZBB-NEXT: bne a1, a6, .LBB7_2 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv t0, a7 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB7_4 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.3: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a6 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_4: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB7_6 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.5: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a5 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_6: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB7_8 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.7: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a2 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_8: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB7_10 +; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.9: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a3 +; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_10: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT: ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umin_1: +; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB7_2 +; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: minu a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT: .LBB7_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT: ret entry: %res = call i64 @llvm.umin.i64(i64 %a, i64 %y) %sel = select i1 %x, i64 %res, i64 %b