From 291c247bc804c69cd39d2409827d8cda5243d2b4 Mon Sep 17 00:00:00 2001 From: Rose Date: Tue, 1 Jul 2025 12:11:39 -0400 Subject: [PATCH 1/3] Pre-commit test (NFC) --- llvm/test/CodeGen/ARM/min-max-combine.ll | 168 +++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/min-max-combine.ll diff --git a/llvm/test/CodeGen/ARM/min-max-combine.ll b/llvm/test/CodeGen/ARM/min-max-combine.ll new file mode 100644 index 0000000000000..a171d06eec32d --- /dev/null +++ b/llvm/test/CodeGen/ARM/min-max-combine.ll @@ -0,0 +1,168 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv7a < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -mtriple=armv6m < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -mtriple=armv7m < %s | FileCheck %s --check-prefix=THUMB2 + +declare i8 @llvm.smax.i8(i8 %a, i8 %b) readnone + +define i8 @smaxi8_zero(i8 %a) { +; ARM-LABEL: smaxi8_zero: +; ARM: @ %bb.0: +; ARM-NEXT: sxtb r0, r0 +; ARM-NEXT: bic r0, r0, r0, asr #31 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: smaxi8_zero: +; THUMB: @ %bb.0: +; THUMB-NEXT: sxtb r0, r0 +; THUMB-NEXT: asrs r1, r0, #31 +; THUMB-NEXT: bics r0, r1 +; THUMB-NEXT: bx lr +; +; THUMB2-LABEL: smaxi8_zero: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: sxtb r0, r0 +; THUMB2-NEXT: bic.w r0, r0, r0, asr #31 +; THUMB2-NEXT: bx lr + %c = call i8 @llvm.smax.i8(i8 %a, i8 0) + ret i8 %c +} + +declare i16 @llvm.smax.i16(i16 %a, i16 %b) readnone + +define i16 @smaxi16_zero(i16 %a) { +; ARM-LABEL: smaxi16_zero: +; ARM: @ %bb.0: +; ARM-NEXT: sxth r0, r0 +; ARM-NEXT: bic r0, r0, r0, asr #31 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: smaxi16_zero: +; THUMB: @ %bb.0: +; THUMB-NEXT: sxth r0, r0 +; THUMB-NEXT: asrs r1, r0, #31 +; THUMB-NEXT: bics r0, r1 +; THUMB-NEXT: bx lr +; +; THUMB2-LABEL: smaxi16_zero: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: sxth r0, r0 +; THUMB2-NEXT: bic.w r0, r0, r0, asr #31 +; THUMB2-NEXT: bx lr + %c = call i16 @llvm.smax.i16(i16 %a, i16 0) + ret i16 %c +} + +declare i32 @llvm.smax.i32(i32 %a, i32 %b) readnone + +define i32 @smaxi32_zero(i32 %a) { +; ARM-LABEL: smaxi32_zero: +; ARM: @ %bb.0: +; ARM-NEXT: bic r0, r0, r0, asr #31 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: smaxi32_zero: +; THUMB: @ %bb.0: +; THUMB-NEXT: asrs r1, r0, #31 +; THUMB-NEXT: bics r0, r1 +; THUMB-NEXT: bx lr +; +; THUMB2-LABEL: smaxi32_zero: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: bic.w r0, r0, r0, asr #31 +; THUMB2-NEXT: bx lr + %c = call i32 @llvm.smax.i32(i32 %a, i32 0) + ret i32 %c +} + +; SMIN + +declare i8 @llvm.smin.i8(i8 %a, i8 %b) readnone + +define i8 @smini8_zero(i8 %a) { +; ARM-LABEL: smini8_zero: +; ARM: @ %bb.0: +; ARM-NEXT: sxtb r0, r0 +; ARM-NEXT: cmp r0, #0 +; ARM-NEXT: movpl r0, #0 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: smini8_zero: +; THUMB: @ %bb.0: +; THUMB-NEXT: sxtb r0, r0 +; THUMB-NEXT: cmp r0, #0 +; THUMB-NEXT: bmi .LBB3_2 +; THUMB-NEXT: @ %bb.1: +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: .LBB3_2: +; THUMB-NEXT: bx lr +; +; THUMB2-LABEL: smini8_zero: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: sxtb r0, r0 +; THUMB2-NEXT: cmp r0, #0 +; THUMB2-NEXT: it pl +; THUMB2-NEXT: movpl r0, #0 +; THUMB2-NEXT: bx lr + %c = call i8 @llvm.smin.i8(i8 %a, i8 0) + ret i8 %c +} + +declare i16 @llvm.smin.i16(i16 %a, i16 %b) readnone + +define i16 @smini16_zero(i16 %a) { +; ARM-LABEL: smini16_zero: +; ARM: @ %bb.0: +; ARM-NEXT: sxth r0, r0 +; ARM-NEXT: cmp r0, #0 +; ARM-NEXT: movpl r0, #0 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: smini16_zero: +; THUMB: @ %bb.0: +; THUMB-NEXT: sxth r0, r0 +; THUMB-NEXT: cmp r0, #0 +; THUMB-NEXT: bmi .LBB4_2 +; THUMB-NEXT: @ %bb.1: +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: .LBB4_2: +; THUMB-NEXT: bx lr +; +; THUMB2-LABEL: smini16_zero: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: sxth r0, r0 +; THUMB2-NEXT: cmp r0, #0 +; THUMB2-NEXT: it pl +; THUMB2-NEXT: movpl r0, #0 +; THUMB2-NEXT: bx lr + %c = call i16 @llvm.smin.i16(i16 %a, i16 0) + ret i16 %c +} + +declare i32 @llvm.smin.i32(i32 %a, i32 %b) readnone + +define i32 @smini32_zero(i32 %a) { +; ARM-LABEL: smini32_zero: +; ARM: @ %bb.0: +; ARM-NEXT: cmp r0, #0 +; ARM-NEXT: movpl r0, #0 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: smini32_zero: +; THUMB: @ %bb.0: +; THUMB-NEXT: cmp r0, #0 +; THUMB-NEXT: bmi .LBB5_2 +; THUMB-NEXT: @ %bb.1: +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: .LBB5_2: +; THUMB-NEXT: bx lr +; +; THUMB2-LABEL: smini32_zero: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: cmp r0, #0 +; THUMB2-NEXT: it pl +; THUMB2-NEXT: movpl r0, #0 +; THUMB2-NEXT: bx lr + %c = call i32 @llvm.smin.i32(i32 %a, i32 0) + ret i32 %c +} From 99039230f1b75e2d7a4298b050bbe3b3834b257d Mon Sep 17 00:00:00 2001 From: Rose Date: Tue, 1 Jul 2025 12:18:35 -0400 Subject: [PATCH 2/3] [ARM] Copy SMAX(lhs, 0) and SMIN(lhs, 0) patterns from AArch64 to ARM They work on ARM too. --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 18 ++++++++ llvm/test/CodeGen/ARM/min-max-combine.ll | 46 ++++++------------- .../predicated-liveout-unknown-lanes.ll | 3 +- .../Thumb2/LowOverheadLoops/unpredload.ll | 1 - 4 files changed, 33 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 2d73725291d11..51d6bf2381c10 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -5540,6 +5540,24 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(VT.getSizeInBits() - 1, dl, VT)); return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT)); } + + // Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns. + // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1)) + // (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1)) + // Both require less instructions than compare and conditional select. + if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TrueVal && RHSC && + RHSC->isZero() && CFVal && CFVal->isZero() && + LHS.getValueType() == RHS.getValueType()) { + EVT VT = LHS.getValueType(); + SDValue Shift = + DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(VT.getSizeInBits() - 1, dl, VT)); + + if (CC == ISD::SETGT) + Shift = DAG.getNOT(dl, Shift, VT); + + return DAG.getNode(ISD::AND, dl, VT, LHS, Shift); + } } if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal && diff --git a/llvm/test/CodeGen/ARM/min-max-combine.ll b/llvm/test/CodeGen/ARM/min-max-combine.ll index a171d06eec32d..b9a7690009337 100644 --- a/llvm/test/CodeGen/ARM/min-max-combine.ll +++ b/llvm/test/CodeGen/ARM/min-max-combine.ll @@ -83,26 +83,20 @@ define i8 @smini8_zero(i8 %a) { ; ARM-LABEL: smini8_zero: ; ARM: @ %bb.0: ; ARM-NEXT: sxtb r0, r0 -; ARM-NEXT: cmp r0, #0 -; ARM-NEXT: movpl r0, #0 +; ARM-NEXT: and r0, r0, r0, asr #31 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: smini8_zero: ; THUMB: @ %bb.0: -; THUMB-NEXT: sxtb r0, r0 -; THUMB-NEXT: cmp r0, #0 -; THUMB-NEXT: bmi .LBB3_2 -; THUMB-NEXT: @ %bb.1: -; THUMB-NEXT: movs r0, #0 -; THUMB-NEXT: .LBB3_2: +; THUMB-NEXT: sxtb r1, r0 +; THUMB-NEXT: asrs r0, r1, #31 +; THUMB-NEXT: ands r0, r1 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: smini8_zero: ; THUMB2: @ %bb.0: ; THUMB2-NEXT: sxtb r0, r0 -; THUMB2-NEXT: cmp r0, #0 -; THUMB2-NEXT: it pl -; THUMB2-NEXT: movpl r0, #0 +; THUMB2-NEXT: and.w r0, r0, r0, asr #31 ; THUMB2-NEXT: bx lr %c = call i8 @llvm.smin.i8(i8 %a, i8 0) ret i8 %c @@ -114,26 +108,20 @@ define i16 @smini16_zero(i16 %a) { ; ARM-LABEL: smini16_zero: ; ARM: @ %bb.0: ; ARM-NEXT: sxth r0, r0 -; ARM-NEXT: cmp r0, #0 -; ARM-NEXT: movpl r0, #0 +; ARM-NEXT: and r0, r0, r0, asr #31 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: smini16_zero: ; THUMB: @ %bb.0: -; THUMB-NEXT: sxth r0, r0 -; THUMB-NEXT: cmp r0, #0 -; THUMB-NEXT: bmi .LBB4_2 -; THUMB-NEXT: @ %bb.1: -; THUMB-NEXT: movs r0, #0 -; THUMB-NEXT: .LBB4_2: +; THUMB-NEXT: sxth r1, r0 +; THUMB-NEXT: asrs r0, r1, #31 +; THUMB-NEXT: ands r0, r1 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: smini16_zero: ; THUMB2: @ %bb.0: ; THUMB2-NEXT: sxth r0, r0 -; THUMB2-NEXT: cmp r0, #0 -; THUMB2-NEXT: it pl -; THUMB2-NEXT: movpl r0, #0 +; THUMB2-NEXT: and.w r0, r0, r0, asr #31 ; THUMB2-NEXT: bx lr %c = call i16 @llvm.smin.i16(i16 %a, i16 0) ret i16 %c @@ -144,24 +132,18 @@ declare i32 @llvm.smin.i32(i32 %a, i32 %b) readnone define i32 @smini32_zero(i32 %a) { ; ARM-LABEL: smini32_zero: ; ARM: @ %bb.0: -; ARM-NEXT: cmp r0, #0 -; ARM-NEXT: movpl r0, #0 +; ARM-NEXT: and r0, r0, r0, asr #31 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: smini32_zero: ; THUMB: @ %bb.0: -; THUMB-NEXT: cmp r0, #0 -; THUMB-NEXT: bmi .LBB5_2 -; THUMB-NEXT: @ %bb.1: -; THUMB-NEXT: movs r0, #0 -; THUMB-NEXT: .LBB5_2: +; THUMB-NEXT: asrs r1, r0, #31 +; THUMB-NEXT: ands r0, r1 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: smini32_zero: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: cmp r0, #0 -; THUMB2-NEXT: it pl -; THUMB2-NEXT: movpl r0, #0 +; THUMB2-NEXT: and.w r0, r0, r0, asr #31 ; THUMB2-NEXT: bx lr %c = call i32 @llvm.smin.i32(i32 %a, i32 0) ret i32 %c diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll index 9194d7842a6d3..9772c8311bfbc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll @@ -6,8 +6,7 @@ define arm_aapcs_vfpcc <4 x float> @arm_max_no_idx_f32_mve(ptr %pSrc, i32 %block ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: subs r2, r1, #4 -; CHECK-NEXT: movw r3, #0 +; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: movt r3, #65408 ; CHECK-NEXT: vdup.32 q0, r3 ; CHECK-NEXT: dlstp.32 lr, r1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll index 6b5b6b2b1b677..573a9420b5278 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll @@ -5,7 +5,6 @@ define void @arm_cmplx_mag_squared_q15_mve(ptr %pSrc, ptr %pDst, i32 %blockSize) ; CHECK-LABEL: arm_cmplx_mag_squared_q15_mve: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: subs.w r3, r2, #8 ; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 From ecc05e89a3005c0fbc3e3a7cf8f1104429091527 Mon Sep 17 00:00:00 2001 From: AZero13 Date: Thu, 10 Jul 2025 12:39:34 -0400 Subject: [PATCH 3/3] Add thumbv8m line to test --- llvm/test/CodeGen/ARM/min-max-combine.ll | 35 ++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/llvm/test/CodeGen/ARM/min-max-combine.ll b/llvm/test/CodeGen/ARM/min-max-combine.ll index b9a7690009337..8cb0d79f5e339 100644 --- a/llvm/test/CodeGen/ARM/min-max-combine.ll +++ b/llvm/test/CodeGen/ARM/min-max-combine.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=armv7a < %s | FileCheck %s --check-prefix=ARM ; RUN: llc -mtriple=armv6m < %s | FileCheck %s --check-prefix=THUMB ; RUN: llc -mtriple=armv7m < %s | FileCheck %s --check-prefix=THUMB2 +; RUN: llc -mtriple=thumbv8.1m.main < %s | FileCheck %s --check-prefix=THUMBV8 declare i8 @llvm.smax.i8(i8 %a, i8 %b) readnone @@ -24,6 +25,12 @@ define i8 @smaxi8_zero(i8 %a) { ; THUMB2-NEXT: sxtb r0, r0 ; THUMB2-NEXT: bic.w r0, r0, r0, asr #31 ; THUMB2-NEXT: bx lr +; +; THUMBV8-LABEL: smaxi8_zero: +; THUMBV8: @ %bb.0: +; THUMBV8-NEXT: sxtb r0, r0 +; THUMBV8-NEXT: bic.w r0, r0, r0, asr #31 +; THUMBV8-NEXT: bx lr %c = call i8 @llvm.smax.i8(i8 %a, i8 0) ret i8 %c } @@ -49,6 +56,12 @@ define i16 @smaxi16_zero(i16 %a) { ; THUMB2-NEXT: sxth r0, r0 ; THUMB2-NEXT: bic.w r0, r0, r0, asr #31 ; THUMB2-NEXT: bx lr +; +; THUMBV8-LABEL: smaxi16_zero: +; THUMBV8: @ %bb.0: +; THUMBV8-NEXT: sxth r0, r0 +; THUMBV8-NEXT: bic.w r0, r0, r0, asr #31 +; THUMBV8-NEXT: bx lr %c = call i16 @llvm.smax.i16(i16 %a, i16 0) ret i16 %c } @@ -71,6 +84,11 @@ define i32 @smaxi32_zero(i32 %a) { ; THUMB2: @ %bb.0: ; THUMB2-NEXT: bic.w r0, r0, r0, asr #31 ; THUMB2-NEXT: bx lr +; +; THUMBV8-LABEL: smaxi32_zero: +; THUMBV8: @ %bb.0: +; THUMBV8-NEXT: bic.w r0, r0, r0, asr #31 +; THUMBV8-NEXT: bx lr %c = call i32 @llvm.smax.i32(i32 %a, i32 0) ret i32 %c } @@ -98,6 +116,12 @@ define i8 @smini8_zero(i8 %a) { ; THUMB2-NEXT: sxtb r0, r0 ; THUMB2-NEXT: and.w r0, r0, r0, asr #31 ; THUMB2-NEXT: bx lr +; +; THUMBV8-LABEL: smini8_zero: +; THUMBV8: @ %bb.0: +; THUMBV8-NEXT: sxtb r0, r0 +; THUMBV8-NEXT: and.w r0, r0, r0, asr #31 +; THUMBV8-NEXT: bx lr %c = call i8 @llvm.smin.i8(i8 %a, i8 0) ret i8 %c } @@ -123,6 +147,12 @@ define i16 @smini16_zero(i16 %a) { ; THUMB2-NEXT: sxth r0, r0 ; THUMB2-NEXT: and.w r0, r0, r0, asr #31 ; THUMB2-NEXT: bx lr +; +; THUMBV8-LABEL: smini16_zero: +; THUMBV8: @ %bb.0: +; THUMBV8-NEXT: sxth r0, r0 +; THUMBV8-NEXT: and.w r0, r0, r0, asr #31 +; THUMBV8-NEXT: bx lr %c = call i16 @llvm.smin.i16(i16 %a, i16 0) ret i16 %c } @@ -145,6 +175,11 @@ define i32 @smini32_zero(i32 %a) { ; THUMB2: @ %bb.0: ; THUMB2-NEXT: and.w r0, r0, r0, asr #31 ; THUMB2-NEXT: bx lr +; +; THUMBV8-LABEL: smini32_zero: +; THUMBV8: @ %bb.0: +; THUMBV8-NEXT: and.w r0, r0, r0, asr #31 +; THUMBV8-NEXT: bx lr %c = call i32 @llvm.smin.i32(i32 %a, i32 0) ret i32 %c }