From 8c4d687fdb6b459b3b4405bbf4da47ceb3683410 Mon Sep 17 00:00:00 2001 From: Rose Date: Mon, 23 Jun 2025 17:19:15 -0400 Subject: [PATCH 1/2] [ARM] Override hasAndNotCompare bics is available on ARM. --- llvm/lib/Target/ARM/ARMISelLowering.h | 5 + llvm/test/CodeGen/ARM/fpclamptosat.ll | 357 +++++++----------- llvm/test/CodeGen/ARM/usat-with-shift.ll | 24 +- llvm/test/CodeGen/ARM/usat.ll | 122 ++++-- .../CodeGen/Thumb2/mve-vselect-constants.ll | 8 +- 5 files changed, 235 insertions(+), 281 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 9c330e60a7d54..604910e04d4cc 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -609,6 +609,11 @@ class VectorType; bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + bool hasAndNotCompare(SDValue V) const override { + // We can use bics for any scalar. + return V.getValueType().isScalarInteger(); + } + bool isShuffleMaskLegal(ArrayRef M, EVT VT) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 478b98dfac80f..8ab56b228d2a7 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -613,14 +613,21 @@ define i16 @ustest_f64i16(double %x) { ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: bl __aeabi_d2iz -; VFP2-NEXT: usat r0, #16, r0 +; VFP2-NEXT: movw r1, #65535 +; VFP2-NEXT: cmp r0, r1 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt r1, r0 +; VFP2-NEXT: bic.w r0, r1, r1, asr #31 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f64i16: ; FULL: @ %bb.0: @ %entry ; FULL-NEXT: vcvt.s32.f64 s0, d0 +; FULL-NEXT: movw r1, #65535 ; FULL-NEXT: vmov r0, s0 -; FULL-NEXT: usat r0, #16, r0 +; FULL-NEXT: cmp r0, r1 +; FULL-NEXT: csel r0, r0, r1, lt +; FULL-NEXT: bic.w r0, r0, r0, asr #31 ; FULL-NEXT: bx lr entry: %conv = fptosi double %x to i32 @@ -738,12 +745,26 @@ define i16 @ustest_f32i16(float %x) { ; SOFT-NEXT: .LCPI14_0: ; SOFT-NEXT: .long 65535 @ 0xffff ; -; VFP-LABEL: ustest_f32i16: -; VFP: @ %bb.0: @ %entry -; VFP-NEXT: vcvt.s32.f32 s0, s0 -; VFP-NEXT: vmov r0, s0 -; VFP-NEXT: usat r0, #16, r0 -; VFP-NEXT: bx lr +; VFP2-LABEL: ustest_f32i16: +; VFP2: @ %bb.0: @ %entry +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: movw r1, #65535 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: cmp r0, r1 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt r1, r0 +; VFP2-NEXT: bic.w r0, r1, r1, asr #31 +; VFP2-NEXT: bx lr +; +; FULL-LABEL: ustest_f32i16: +; FULL: @ %bb.0: @ %entry +; FULL-NEXT: vcvt.s32.f32 s0, s0 +; FULL-NEXT: movw r1, #65535 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: cmp r0, r1 +; FULL-NEXT: csel r0, r0, r1, lt +; FULL-NEXT: bic.w r0, r0, r0, asr #31 +; FULL-NEXT: bx lr entry: %conv = fptosi float %x to i32 %0 = icmp slt i32 %conv, 65535 @@ -890,16 +911,23 @@ define i16 @ustest_f16i16(half %x) { ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: movw r1, #65535 ; VFP2-NEXT: vcvt.s32.f32 s0, s0 ; VFP2-NEXT: vmov r0, s0 -; VFP2-NEXT: usat r0, #16, r0 +; VFP2-NEXT: cmp r0, r1 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt r1, r0 +; VFP2-NEXT: bic.w r0, r1, r1, asr #31 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f16i16: ; FULL: @ %bb.0: @ %entry ; FULL-NEXT: vcvt.s32.f16 s0, s0 +; FULL-NEXT: movw r1, #65535 ; FULL-NEXT: vmov r0, s0 -; FULL-NEXT: usat r0, #16, r0 +; FULL-NEXT: cmp r0, r1 +; FULL-NEXT: csel r0, r0, r1, lt +; FULL-NEXT: bic.w r0, r0, r0, asr #31 ; FULL-NEXT: bx lr entry: %conv = fptosi half %x to i32 @@ -1101,83 +1129,48 @@ entry: define i64 @ustest_f64i64(double %x) { ; SOFT-LABEL: ustest_f64i64: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, lr} -; SOFT-NEXT: push {r4, r5, r6, lr} +; SOFT-NEXT: .save {r4, lr} +; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: subs r6, r2, #1 -; SOFT-NEXT: mov r6, r3 -; SOFT-NEXT: sbcs r6, r5 -; SOFT-NEXT: bge .LBB20_9 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: subs r2, r2, #1 +; SOFT-NEXT: mov r2, r3 +; SOFT-NEXT: sbcs r2, r4 +; SOFT-NEXT: bge .LBB20_5 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: bge .LBB20_10 +; SOFT-NEXT: bge .LBB20_6 ; SOFT-NEXT: .LBB20_2: @ %entry -; SOFT-NEXT: bge .LBB20_11 +; SOFT-NEXT: blt .LBB20_4 ; SOFT-NEXT: .LBB20_3: @ %entry -; SOFT-NEXT: blt .LBB20_5 +; SOFT-NEXT: mov r3, r4 ; SOFT-NEXT: .LBB20_4: @ %entry -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: asrs r2, r3, #31 +; SOFT-NEXT: bics r0, r2 +; SOFT-NEXT: bics r1, r2 +; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .LBB20_5: @ %entry -; SOFT-NEXT: rsbs r6, r0, #0 -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: sbcs r6, r1 -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: sbcs r6, r2 -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: sbcs r2, r3 -; SOFT-NEXT: bge .LBB20_12 -; SOFT-NEXT: @ %bb.6: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB20_13 -; SOFT-NEXT: .LBB20_7: @ %entry -; SOFT-NEXT: beq .LBB20_14 -; SOFT-NEXT: .LBB20_8: @ %entry -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB20_9: @ %entry -; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: blt .LBB20_2 -; SOFT-NEXT: .LBB20_10: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: blt .LBB20_3 -; SOFT-NEXT: .LBB20_11: @ %entry -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: bge .LBB20_4 -; SOFT-NEXT: b .LBB20_5 -; SOFT-NEXT: .LBB20_12: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB20_7 -; SOFT-NEXT: .LBB20_13: @ %entry +; SOFT-NEXT: .LBB20_6: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB20_8 -; SOFT-NEXT: .LBB20_14: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, r5, r6, pc} +; SOFT-NEXT: bge .LBB20_3 +; SOFT-NEXT: b .LBB20_4 ; ; VFP2-LABEL: ustest_f64i64: ; VFP2: @ %bb.0: @ %entry ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: subs.w lr, r2, #1 +; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs lr, r3, #0 -; VFP2-NEXT: itttt ge +; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2-NEXT: itt ge ; VFP2-NEXT: movge r3, r12 -; VFP2-NEXT: movge r2, #1 -; VFP2-NEXT: movge r1, r12 ; VFP2-NEXT: movge r0, r12 -; VFP2-NEXT: rsbs.w lr, r0, #0 -; VFP2-NEXT: sbcs.w lr, r12, r1 -; VFP2-NEXT: sbcs.w r2, r12, r2 -; VFP2-NEXT: sbcs.w r2, r12, r3 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: it ge +; VFP2-NEXT: movge r1, r12 +; VFP2-NEXT: bic.w r0, r0, r3, asr #31 +; VFP2-NEXT: bic.w r1, r1, r3, asr #31 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f64i64: @@ -1185,22 +1178,14 @@ define i64 @ustest_f64i64(double %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixdfti -; FULL-NEXT: subs.w lr, r2, #1 +; FULL-NEXT: subs r2, #1 ; FULL-NEXT: mov.w r12, #0 -; FULL-NEXT: sbcs lr, r3, #0 -; FULL-NEXT: it ge -; FULL-NEXT: movge r2, #1 +; FULL-NEXT: sbcs r2, r3, #0 +; FULL-NEXT: csel r2, r3, r12, lt ; FULL-NEXT: csel r0, r0, r12, lt -; FULL-NEXT: csel lr, r3, r12, lt ; FULL-NEXT: csel r1, r1, r12, lt -; FULL-NEXT: rsbs r3, r0, #0 -; FULL-NEXT: sbcs.w r3, r12, r1 -; FULL-NEXT: sbcs.w r2, r12, r2 -; FULL-NEXT: sbcs.w r2, r12, lr -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r2, ne +; FULL-NEXT: bic.w r0, r0, r2, asr #31 +; FULL-NEXT: bic.w r1, r1, r2, asr #31 ; FULL-NEXT: pop {r7, pc} entry: %conv = fptosi double %x to i128 @@ -1400,83 +1385,48 @@ entry: define i64 @ustest_f32i64(float %x) { ; SOFT-LABEL: ustest_f32i64: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, lr} -; SOFT-NEXT: push {r4, r5, r6, lr} +; SOFT-NEXT: .save {r4, lr} +; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: subs r6, r2, #1 -; SOFT-NEXT: mov r6, r3 -; SOFT-NEXT: sbcs r6, r5 -; SOFT-NEXT: bge .LBB23_9 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: subs r2, r2, #1 +; SOFT-NEXT: mov r2, r3 +; SOFT-NEXT: sbcs r2, r4 +; SOFT-NEXT: bge .LBB23_5 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: bge .LBB23_10 +; SOFT-NEXT: bge .LBB23_6 ; SOFT-NEXT: .LBB23_2: @ %entry -; SOFT-NEXT: bge .LBB23_11 +; SOFT-NEXT: blt .LBB23_4 ; SOFT-NEXT: .LBB23_3: @ %entry -; SOFT-NEXT: blt .LBB23_5 +; SOFT-NEXT: mov r3, r4 ; SOFT-NEXT: .LBB23_4: @ %entry -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: asrs r2, r3, #31 +; SOFT-NEXT: bics r0, r2 +; SOFT-NEXT: bics r1, r2 +; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .LBB23_5: @ %entry -; SOFT-NEXT: rsbs r6, r0, #0 -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: sbcs r6, r1 -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: sbcs r6, r2 -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: sbcs r2, r3 -; SOFT-NEXT: bge .LBB23_12 -; SOFT-NEXT: @ %bb.6: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB23_13 -; SOFT-NEXT: .LBB23_7: @ %entry -; SOFT-NEXT: beq .LBB23_14 -; SOFT-NEXT: .LBB23_8: @ %entry -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB23_9: @ %entry -; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: blt .LBB23_2 -; SOFT-NEXT: .LBB23_10: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: blt .LBB23_3 -; SOFT-NEXT: .LBB23_11: @ %entry -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: bge .LBB23_4 -; SOFT-NEXT: b .LBB23_5 -; SOFT-NEXT: .LBB23_12: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB23_7 -; SOFT-NEXT: .LBB23_13: @ %entry +; SOFT-NEXT: .LBB23_6: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB23_8 -; SOFT-NEXT: .LBB23_14: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, r5, r6, pc} +; SOFT-NEXT: bge .LBB23_3 +; SOFT-NEXT: b .LBB23_4 ; ; VFP2-LABEL: ustest_f32i64: ; VFP2: @ %bb.0: @ %entry ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs.w lr, r2, #1 +; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs lr, r3, #0 -; VFP2-NEXT: itttt ge +; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2-NEXT: itt ge ; VFP2-NEXT: movge r3, r12 -; VFP2-NEXT: movge r2, #1 -; VFP2-NEXT: movge r1, r12 ; VFP2-NEXT: movge r0, r12 -; VFP2-NEXT: rsbs.w lr, r0, #0 -; VFP2-NEXT: sbcs.w lr, r12, r1 -; VFP2-NEXT: sbcs.w r2, r12, r2 -; VFP2-NEXT: sbcs.w r2, r12, r3 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: it ge +; VFP2-NEXT: movge r1, r12 +; VFP2-NEXT: bic.w r0, r0, r3, asr #31 +; VFP2-NEXT: bic.w r1, r1, r3, asr #31 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f32i64: @@ -1484,22 +1434,14 @@ define i64 @ustest_f32i64(float %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixsfti -; FULL-NEXT: subs.w lr, r2, #1 +; FULL-NEXT: subs r2, #1 ; FULL-NEXT: mov.w r12, #0 -; FULL-NEXT: sbcs lr, r3, #0 -; FULL-NEXT: it ge -; FULL-NEXT: movge r2, #1 +; FULL-NEXT: sbcs r2, r3, #0 +; FULL-NEXT: csel r2, r3, r12, lt ; FULL-NEXT: csel r0, r0, r12, lt -; FULL-NEXT: csel lr, r3, r12, lt ; FULL-NEXT: csel r1, r1, r12, lt -; FULL-NEXT: rsbs r3, r0, #0 -; FULL-NEXT: sbcs.w r3, r12, r1 -; FULL-NEXT: sbcs.w r2, r12, r2 -; FULL-NEXT: sbcs.w r2, r12, lr -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r2, ne +; FULL-NEXT: bic.w r0, r0, r2, asr #31 +; FULL-NEXT: bic.w r1, r1, r2, asr #31 ; FULL-NEXT: pop {r7, pc} entry: %conv = fptosi float %x to i128 @@ -1713,61 +1655,34 @@ entry: define i64 @ustest_f16i64(half %x) { ; SOFT-LABEL: ustest_f16i64: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, lr} -; SOFT-NEXT: push {r4, r5, r6, lr} +; SOFT-NEXT: .save {r4, lr} +; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: subs r6, r2, #1 -; SOFT-NEXT: mov r6, r3 -; SOFT-NEXT: sbcs r6, r5 -; SOFT-NEXT: bge .LBB26_9 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: subs r2, r2, #1 +; SOFT-NEXT: mov r2, r3 +; SOFT-NEXT: sbcs r2, r4 +; SOFT-NEXT: bge .LBB26_5 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: bge .LBB26_10 +; SOFT-NEXT: bge .LBB26_6 ; SOFT-NEXT: .LBB26_2: @ %entry -; SOFT-NEXT: bge .LBB26_11 +; SOFT-NEXT: blt .LBB26_4 ; SOFT-NEXT: .LBB26_3: @ %entry -; SOFT-NEXT: blt .LBB26_5 +; SOFT-NEXT: mov r3, r4 ; SOFT-NEXT: .LBB26_4: @ %entry -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: asrs r2, r3, #31 +; SOFT-NEXT: bics r0, r2 +; SOFT-NEXT: bics r1, r2 +; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .LBB26_5: @ %entry -; SOFT-NEXT: rsbs r6, r0, #0 -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: sbcs r6, r1 -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: sbcs r6, r2 -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: sbcs r2, r3 -; SOFT-NEXT: bge .LBB26_12 -; SOFT-NEXT: @ %bb.6: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB26_13 -; SOFT-NEXT: .LBB26_7: @ %entry -; SOFT-NEXT: beq .LBB26_14 -; SOFT-NEXT: .LBB26_8: @ %entry -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB26_9: @ %entry -; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: blt .LBB26_2 -; SOFT-NEXT: .LBB26_10: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: blt .LBB26_3 -; SOFT-NEXT: .LBB26_11: @ %entry -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: bge .LBB26_4 -; SOFT-NEXT: b .LBB26_5 -; SOFT-NEXT: .LBB26_12: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB26_7 -; SOFT-NEXT: .LBB26_13: @ %entry +; SOFT-NEXT: .LBB26_6: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB26_8 -; SOFT-NEXT: .LBB26_14: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, r5, r6, pc} +; SOFT-NEXT: bge .LBB26_3 +; SOFT-NEXT: b .LBB26_4 ; ; VFP2-LABEL: ustest_f16i64: ; VFP2: @ %bb.0: @ %entry @@ -1777,24 +1692,16 @@ define i64 @ustest_f16i64(half %x) { ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs.w lr, r2, #1 +; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs lr, r3, #0 -; VFP2-NEXT: itttt ge +; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2-NEXT: itt ge ; VFP2-NEXT: movge r3, r12 -; VFP2-NEXT: movge r2, #1 -; VFP2-NEXT: movge r1, r12 ; VFP2-NEXT: movge r0, r12 -; VFP2-NEXT: rsbs.w lr, r0, #0 -; VFP2-NEXT: sbcs.w lr, r12, r1 -; VFP2-NEXT: sbcs.w r2, r12, r2 -; VFP2-NEXT: sbcs.w r2, r12, r3 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: it ge +; VFP2-NEXT: movge r1, r12 +; VFP2-NEXT: bic.w r0, r0, r3, asr #31 +; VFP2-NEXT: bic.w r1, r1, r3, asr #31 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f16i64: @@ -1804,22 +1711,14 @@ define i64 @ustest_f16i64(half %x) { ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti -; FULL-NEXT: subs.w lr, r2, #1 +; FULL-NEXT: subs r2, #1 ; FULL-NEXT: mov.w r12, #0 -; FULL-NEXT: sbcs lr, r3, #0 -; FULL-NEXT: it ge -; FULL-NEXT: movge r2, #1 +; FULL-NEXT: sbcs r2, r3, #0 +; FULL-NEXT: csel r2, r3, r12, lt ; FULL-NEXT: csel r0, r0, r12, lt -; FULL-NEXT: csel lr, r3, r12, lt ; FULL-NEXT: csel r1, r1, r12, lt -; FULL-NEXT: rsbs r3, r0, #0 -; FULL-NEXT: sbcs.w r3, r12, r1 -; FULL-NEXT: sbcs.w r2, r12, r2 -; FULL-NEXT: sbcs.w r2, r12, lr -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r2, ne +; FULL-NEXT: bic.w r0, r0, r2, asr #31 +; FULL-NEXT: bic.w r1, r1, r2, asr #31 ; FULL-NEXT: pop {r7, pc} entry: %conv = fptosi half %x to i128 diff --git a/llvm/test/CodeGen/ARM/usat-with-shift.ll b/llvm/test/CodeGen/ARM/usat-with-shift.ll index cc3de9d6d3407..0eca4c4a76c6c 100644 --- a/llvm/test/CodeGen/ARM/usat-with-shift.ll +++ b/llvm/test/CodeGen/ARM/usat-with-shift.ll @@ -3,10 +3,10 @@ ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+dsp %s -o - | FileCheck %s define arm_aapcs_vfpcc i32 @usat_lsl(i32 %num){ -; CHECK-LABEL: usat_lsl -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: usat r0, #7, r0, lsl #2 -; CHECK-NEXT: bx lr +; CHECK-LABEL: usat_lsl: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: usat r0, #7, r0, lsl #2 +; CHECK-NEXT: bx lr entry: %shl = shl i32 %num, 2 %0 = tail call i32 @llvm.arm.usat(i32 %shl, i32 7) @@ -14,10 +14,10 @@ entry: } define arm_aapcs_vfpcc i32 @usat_asr(i32 %num){ -; CHECK-LABEL: usat_asr -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: usat r0, #7, r0, asr #2 -; CHECK-NEXT: bx lr +; CHECK-LABEL: usat_asr: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: usat r0, #7, r0, asr #2 +; CHECK-NEXT: bx lr entry: %shr = ashr i32 %num, 2 %0 = tail call i32 @llvm.arm.usat(i32 %shr, i32 7) @@ -25,10 +25,6 @@ entry: } define arm_aapcs_vfpcc i32 @usat_lsl2(i32 %num){ -; CHECK-LABEL: usat_lsl2: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: usat r0, #15, r0, lsl #15 -; CHECK-NEXT: bx lr entry: %shl = shl nsw i32 %num, 15 %0 = icmp sgt i32 %shl, 0 @@ -39,10 +35,6 @@ entry: } define arm_aapcs_vfpcc i32 @usat_asr2(i32 %num){ -; CHECK-LABEL: usat_asr2: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: usat r0, #15, r0, asr #15 -; CHECK-NEXT: bx lr entry: %shr = ashr i32 %num, 15 %0 = icmp sgt i32 %shr, 0 diff --git a/llvm/test/CodeGen/ARM/usat.ll b/llvm/test/CodeGen/ARM/usat.ll index d01aa1520b326..2e1d0283ebde2 100644 --- a/llvm/test/CodeGen/ARM/usat.ll +++ b/llvm/test/CodeGen/ARM/usat.ll @@ -32,12 +32,23 @@ define i32 @unsigned_sat_base_32bit(i32 %x) #0 { ; ; V6-LABEL: unsigned_sat_base_32bit: ; V6: @ %bb.0: @ %entry -; V6-NEXT: usat r0, #23, r0 +; V6-NEXT: ldr r1, .LCPI0_0 +; V6-NEXT: cmp r0, r1 +; V6-NEXT: movlt r1, r0 +; V6-NEXT: bic r0, r1, r1, asr #31 ; V6-NEXT: bx lr +; V6-NEXT: .p2align 2 +; V6-NEXT: @ %bb.1: +; V6-NEXT: .LCPI0_0: +; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: unsigned_sat_base_32bit: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: usat r0, #23, r0 +; V6T2-NEXT: movw r1, #65535 +; V6T2-NEXT: movt r1, #127 +; V6T2-NEXT: cmp r0, r1 +; V6T2-NEXT: movlt r1, r0 +; V6T2-NEXT: bic r0, r1, r1, asr #31 ; V6T2-NEXT: bx lr entry: %0 = icmp slt i32 %x, 8388607 @@ -57,11 +68,9 @@ define i16 @unsigned_sat_base_16bit(i16 %x) #0 { ; V4T-NEXT: orr r2, r2, #1792 ; V4T-NEXT: asr r1, r1, #16 ; V4T-NEXT: cmp r1, r2 -; V4T-NEXT: movge r0, r2 -; V4T-NEXT: lsl r1, r0, #16 -; V4T-NEXT: asr r1, r1, #16 -; V4T-NEXT: cmp r1, #0 -; V4T-NEXT: movle r0, #0 +; V4T-NEXT: movlt r2, r0 +; V4T-NEXT: lsl r0, r2, #16 +; V4T-NEXT: bic r0, r2, r0, asr #31 ; V4T-NEXT: bx lr ; ; V6-LABEL: unsigned_sat_base_16bit: @@ -70,10 +79,9 @@ define i16 @unsigned_sat_base_16bit(i16 %x) #0 { ; V6-NEXT: sxth r1, r0 ; V6-NEXT: orr r2, r2, #1792 ; V6-NEXT: cmp r1, r2 -; V6-NEXT: movge r0, r2 -; V6-NEXT: sxth r1, r0 -; V6-NEXT: cmp r1, #0 -; V6-NEXT: movle r0, #0 +; V6-NEXT: movlt r2, r0 +; V6-NEXT: sxth r0, r2 +; V6-NEXT: bic r0, r2, r0, asr #15 ; V6-NEXT: bx lr ; ; V6T2-LABEL: unsigned_sat_base_16bit: @@ -81,10 +89,9 @@ define i16 @unsigned_sat_base_16bit(i16 %x) #0 { ; V6T2-NEXT: sxth r1, r0 ; V6T2-NEXT: movw r2, #2047 ; V6T2-NEXT: cmp r1, r2 -; V6T2-NEXT: movge r0, r2 -; V6T2-NEXT: sxth r1, r0 -; V6T2-NEXT: cmp r1, #0 -; V6T2-NEXT: movle r0, #0 +; V6T2-NEXT: movlt r2, r0 +; V6T2-NEXT: sxth r0, r2 +; V6T2-NEXT: bic r0, r2, r0, asr #15 ; V6T2-NEXT: bx lr entry: %0 = icmp slt i16 %x, 2047 @@ -104,9 +111,7 @@ define i8 @unsigned_sat_base_8bit(i8 %x) #0 { ; V4T-NEXT: cmp r1, #31 ; V4T-NEXT: movge r0, #31 ; V4T-NEXT: lsl r1, r0, #24 -; V4T-NEXT: asr r1, r1, #24 -; V4T-NEXT: cmp r1, #0 -; V4T-NEXT: movle r0, #0 +; V4T-NEXT: bic r0, r0, r1, asr #31 ; V4T-NEXT: bx lr ; ; V6-LABEL: unsigned_sat_base_8bit: @@ -115,8 +120,7 @@ define i8 @unsigned_sat_base_8bit(i8 %x) #0 { ; V6-NEXT: cmp r1, #31 ; V6-NEXT: movge r0, #31 ; V6-NEXT: sxtb r1, r0 -; V6-NEXT: cmp r1, #0 -; V6-NEXT: movle r0, #0 +; V6-NEXT: bic r0, r0, r1, asr #7 ; V6-NEXT: bx lr ; ; V6T2-LABEL: unsigned_sat_base_8bit: @@ -125,8 +129,7 @@ define i8 @unsigned_sat_base_8bit(i8 %x) #0 { ; V6T2-NEXT: cmp r1, #31 ; V6T2-NEXT: movge r0, #31 ; V6T2-NEXT: sxtb r1, r0 -; V6T2-NEXT: cmp r1, #0 -; V6T2-NEXT: movle r0, #0 +; V6T2-NEXT: bic r0, r0, r1, asr #7 ; V6T2-NEXT: bx lr entry: %0 = icmp slt i8 %x, 31 @@ -157,12 +160,23 @@ define i32 @unsigned_sat_lower_upper_1(i32 %x) #0 { ; ; V6-LABEL: unsigned_sat_lower_upper_1: ; V6: @ %bb.0: @ %entry -; V6-NEXT: usat r0, #23, r0 +; V6-NEXT: ldr r1, .LCPI3_0 +; V6-NEXT: cmp r0, r1 +; V6-NEXT: movlt r1, r0 +; V6-NEXT: bic r0, r1, r1, asr #31 ; V6-NEXT: bx lr +; V6-NEXT: .p2align 2 +; V6-NEXT: @ %bb.1: +; V6-NEXT: .LCPI3_0: +; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: unsigned_sat_lower_upper_1: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: usat r0, #23, r0 +; V6T2-NEXT: movw r1, #65535 +; V6T2-NEXT: movt r1, #127 +; V6T2-NEXT: cmp r0, r1 +; V6T2-NEXT: movlt r1, r0 +; V6T2-NEXT: bic r0, r1, r1, asr #31 ; V6T2-NEXT: bx lr entry: %cmpUp = icmp slt i32 %x, 8388607 @@ -188,12 +202,23 @@ define i32 @unsigned_sat_lower_upper_2(i32 %x) #0 { ; ; V6-LABEL: unsigned_sat_lower_upper_2: ; V6: @ %bb.0: @ %entry -; V6-NEXT: usat r0, #23, r0 +; V6-NEXT: ldr r1, .LCPI4_0 +; V6-NEXT: cmp r0, r1 +; V6-NEXT: movlt r1, r0 +; V6-NEXT: bic r0, r1, r1, asr #31 ; V6-NEXT: bx lr +; V6-NEXT: .p2align 2 +; V6-NEXT: @ %bb.1: +; V6-NEXT: .LCPI4_0: +; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: unsigned_sat_lower_upper_2: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: usat r0, #23, r0 +; V6T2-NEXT: movw r1, #65535 +; V6T2-NEXT: movt r1, #127 +; V6T2-NEXT: cmp r0, r1 +; V6T2-NEXT: movlt r1, r0 +; V6T2-NEXT: bic r0, r1, r1, asr #31 ; V6T2-NEXT: bx lr entry: %0 = icmp slt i32 %x, 8388607 @@ -219,12 +244,23 @@ define i32 @unsigned_sat_upper_lower_1(i32 %x) #0 { ; ; V6-LABEL: unsigned_sat_upper_lower_1: ; V6: @ %bb.0: @ %entry -; V6-NEXT: usat r0, #23, r0 +; V6-NEXT: bic r1, r0, r0, asr #31 +; V6-NEXT: ldr r0, .LCPI5_0 +; V6-NEXT: cmp r1, r0 +; V6-NEXT: movlt r0, r1 ; V6-NEXT: bx lr +; V6-NEXT: .p2align 2 +; V6-NEXT: @ %bb.1: +; V6-NEXT: .LCPI5_0: +; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: unsigned_sat_upper_lower_1: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: usat r0, #23, r0 +; V6T2-NEXT: bic r1, r0, r0, asr #31 +; V6T2-NEXT: movw r0, #65535 +; V6T2-NEXT: movt r0, #127 +; V6T2-NEXT: cmp r1, r0 +; V6T2-NEXT: movlt r0, r1 ; V6T2-NEXT: bx lr entry: %0 = icmp sgt i32 %x, 0 @@ -250,12 +286,23 @@ define i32 @unsigned_sat_upper_lower_2(i32 %x) #0 { ; ; V6-LABEL: unsigned_sat_upper_lower_2: ; V6: @ %bb.0: @ %entry -; V6-NEXT: usat r0, #23, r0 +; V6-NEXT: bic r1, r0, r0, asr #31 +; V6-NEXT: ldr r0, .LCPI6_0 +; V6-NEXT: cmp r1, r0 +; V6-NEXT: movlt r0, r1 ; V6-NEXT: bx lr +; V6-NEXT: .p2align 2 +; V6-NEXT: @ %bb.1: +; V6-NEXT: .LCPI6_0: +; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: unsigned_sat_upper_lower_2: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: usat r0, #23, r0 +; V6T2-NEXT: bic r1, r0, r0, asr #31 +; V6T2-NEXT: movw r0, #65535 +; V6T2-NEXT: movt r0, #127 +; V6T2-NEXT: cmp r1, r0 +; V6T2-NEXT: movlt r0, r1 ; V6T2-NEXT: bx lr entry: %0 = icmp sgt i32 %x, 0 @@ -281,12 +328,23 @@ define i32 @unsigned_sat_upper_lower_3(i32 %x) #0 { ; ; V6-LABEL: unsigned_sat_upper_lower_3: ; V6: @ %bb.0: @ %entry -; V6-NEXT: usat r0, #23, r0 +; V6-NEXT: bic r1, r0, r0, asr #31 +; V6-NEXT: ldr r0, .LCPI7_0 +; V6-NEXT: cmp r1, r0 +; V6-NEXT: movlt r0, r1 ; V6-NEXT: bx lr +; V6-NEXT: .p2align 2 +; V6-NEXT: @ %bb.1: +; V6-NEXT: .LCPI7_0: +; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: unsigned_sat_upper_lower_3: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: usat r0, #23, r0 +; V6T2-NEXT: bic r1, r0, r0, asr #31 +; V6T2-NEXT: movw r0, #65535 +; V6T2-NEXT: movt r0, #127 +; V6T2-NEXT: cmp r1, r0 +; V6T2-NEXT: movlt r0, r1 ; V6T2-NEXT: bx lr entry: %cmpLow = icmp sgt i32 %x, 0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll b/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll index 726237eb27f2d..024de2b36667b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll @@ -282,12 +282,12 @@ define arm_aapcs_vfpcc <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %a, <2 x i64> ; CHECK-NEXT: vmov r1, s1 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: vmov.i32 q2, #0x0 -; CHECK-NEXT: cmp.w r1, #-1 -; CHECK-NEXT: csetm r1, gt +; CHECK-NEXT: mvns r1, r1 +; CHECK-NEXT: asrs r1, r1, #31 ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: vmov r1, s3 -; CHECK-NEXT: cmp.w r1, #-1 -; CHECK-NEXT: csetm r1, gt +; CHECK-NEXT: mvns r1, r1 +; CHECK-NEXT: asrs r1, r1, #31 ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q1, q2 From 04c9237afc65ac2f732e84cc48957dd3b286bc80 Mon Sep 17 00:00:00 2001 From: AZero13 Date: Sat, 28 Jun 2025 11:04:33 -0400 Subject: [PATCH 2/2] Fix the tests --- llvm/test/CodeGen/ARM/usat-with-shift.ll | 40 ++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/ARM/usat-with-shift.ll b/llvm/test/CodeGen/ARM/usat-with-shift.ll index 0eca4c4a76c6c..b9c083e498c0c 100644 --- a/llvm/test/CodeGen/ARM/usat-with-shift.ll +++ b/llvm/test/CodeGen/ARM/usat-with-shift.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+dsp %s -o - | FileCheck %s +; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,ARMV6 +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+dsp %s -o - | FileCheck %s --check-prefixes=CHECK,THUMB define arm_aapcs_vfpcc i32 @usat_lsl(i32 %num){ ; CHECK-LABEL: usat_lsl: @@ -25,6 +25,24 @@ entry: } define arm_aapcs_vfpcc i32 @usat_lsl2(i32 %num){ +; ARMV6-LABEL: usat_lsl2: +; ARMV6: @ %bb.0: @ %entry +; ARMV6-NEXT: lsl r0, r0, #15 +; ARMV6-NEXT: bic r1, r0, r0, asr #31 +; ARMV6-NEXT: mov r0, #255 +; ARMV6-NEXT: orr r0, r0, #32512 +; ARMV6-NEXT: cmp r1, r0 +; ARMV6-NEXT: movlt r0, r1 +; ARMV6-NEXT: bx lr +; +; THUMB-LABEL: usat_lsl2: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: lsls r0, r0, #15 +; THUMB-NEXT: movw r1, #32767 +; THUMB-NEXT: bic.w r0, r0, r0, asr #31 +; THUMB-NEXT: cmp r0, r1 +; THUMB-NEXT: csel r0, r0, r1, lt +; THUMB-NEXT: bx lr entry: %shl = shl nsw i32 %num, 15 %0 = icmp sgt i32 %shl, 0 @@ -35,6 +53,24 @@ entry: } define arm_aapcs_vfpcc i32 @usat_asr2(i32 %num){ +; ARMV6-LABEL: usat_asr2: +; ARMV6: @ %bb.0: @ %entry +; ARMV6-NEXT: asr r1, r0, #15 +; ARMV6-NEXT: bic r1, r1, r0, asr #31 +; ARMV6-NEXT: mov r0, #255 +; ARMV6-NEXT: orr r0, r0, #32512 +; ARMV6-NEXT: cmp r1, r0 +; ARMV6-NEXT: movlt r0, r1 +; ARMV6-NEXT: bx lr +; +; THUMB-LABEL: usat_asr2: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: asrs r1, r0, #15 +; THUMB-NEXT: bic.w r0, r1, r0, asr #31 +; THUMB-NEXT: movw r1, #32767 +; THUMB-NEXT: cmp r0, r1 +; THUMB-NEXT: csel r0, r0, r1, lt +; THUMB-NEXT: bx lr entry: %shr = ashr i32 %num, 15 %0 = icmp sgt i32 %shr, 0