Skip to content

Conversation

@AZero13
Copy link
Contributor

@AZero13 AZero13 commented Aug 10, 2025

No description provided.

@AZero13 AZero13 marked this pull request as ready for review August 10, 2025 22:41
@llvmbot
Copy link
Member

llvmbot commented Aug 10, 2025

@llvm/pr-subscribers-backend-arm

Author: AZero13 (AZero13)

Changes

Patch is 332.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152945.diff

14 Files Affected:

  • (modified) llvm/lib/Target/ARM/ARMInstrThumb2.td (+9)
  • (modified) llvm/test/CodeGen/ARM/fpclamptosat.ll (+66-66)
  • (modified) llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll (+306-165)
  • (modified) llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll (+244-137)
  • (modified) llvm/test/CodeGen/Thumb/scmp.ll (+4-8)
  • (modified) llvm/test/CodeGen/Thumb/ucmp.ll (+4-8)
  • (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll (+16-16)
  • (modified) llvm/test/CodeGen/Thumb2/mve-blockplacement.ll (+12-12)
  • (modified) llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll (+85-81)
  • (modified) llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll (+1504-1455)
  • (modified) llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll (+1158-1254)
  • (modified) llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll (+1-1)
  • (modified) llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll (+2-4)
  • (modified) llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll (+6-10)
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 8f56fb0938dd0..cd26e19378170 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -5765,6 +5765,15 @@ let Predicates = [HasV8_1MMainline] in {
   def : T2Pat<(ARMcmov (i32 0), (i32 -1), imm:$cc, CPSR),
               (t2CSINV ZR, ZR, (inv_cond_XFORM imm:$cc))>;
 
+  def : T2Pat<(ARMcmov GPRwithZR:$tval, (i32 1), imm:$cc, CPSR),
+              (t2CSINC GPRwithZR:$tval, ZR, imm:$cc)>;
+  def : T2Pat<(ARMcmov (i32 1), GPRwithZR:$fval, imm:$cc, CPSR),
+              (t2CSINC GPRwithZR:$fval, ZR, (inv_cond_XFORM imm:$cc))>;
+  def : T2Pat<(ARMcmov GPRwithZR:$tval, (i32 -1), imm:$cc, CPSR),
+              (t2CSINV GPRwithZR:$tval, ZR, imm:$cc)>;
+  def : T2Pat<(ARMcmov (i32 -1), GPRwithZR:$fval, imm:$cc, CPSR),
+              (t2CSINV GPRwithZR:$fval, ZR, (inv_cond_XFORM imm:$cc))>;
+
   multiclass ModifiedV8_1CSEL<Instruction Insn, dag modvalue> {
     def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, imm:$cc, CPSR),
                 (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc)>;
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 8ab56b228d2a7..87a1221088e09 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -1039,8 +1039,8 @@ define i64 @stest_f64i64(double %x) {
 ;
 ; FULL-LABEL: stest_f64i64:
 ; FULL:       @ %bb.0: @ %entry
-; FULL-NEXT:    .save {r4, r5, r7, lr}
-; FULL-NEXT:    push {r4, r5, r7, lr}
+; FULL-NEXT:    .save {r4, lr}
+; FULL-NEXT:    push {r4, lr}
 ; FULL-NEXT:    bl __fixdfti
 ; FULL-NEXT:    subs.w lr, r0, #-1
 ; FULL-NEXT:    mvn r12, #-2147483648
@@ -1049,20 +1049,20 @@ define i64 @stest_f64i64(double %x) {
 ; FULL-NEXT:    sbcs lr, r3, #0
 ; FULL-NEXT:    cset lr, lt
 ; FULL-NEXT:    cmp.w lr, #0
-; FULL-NEXT:    csel r5, r3, lr, ne
-; FULL-NEXT:    mov.w r3, #-1
-; FULL-NEXT:    csel r0, r0, r3, ne
+; FULL-NEXT:    csinv r0, r0, zr, eq
 ; FULL-NEXT:    csel r1, r1, r12, ne
+; FULL-NEXT:    csel r3, r3, lr, ne
 ; FULL-NEXT:    csel r2, r2, lr, ne
 ; FULL-NEXT:    rsbs r4, r0, #0
-; FULL-NEXT:    mov.w r12, #-2147483648
-; FULL-NEXT:    sbcs.w r4, r12, r1
-; FULL-NEXT:    sbcs.w r2, r3, r2
-; FULL-NEXT:    sbcs.w r2, r3, r5
+; FULL-NEXT:    mov.w lr, #-2147483648
+; FULL-NEXT:    sbcs.w r4, lr, r1
+; FULL-NEXT:    mov.w r12, #-1
+; FULL-NEXT:    sbcs.w r2, r12, r2
+; FULL-NEXT:    sbcs.w r2, r12, r3
 ; FULL-NEXT:    it ge
 ; FULL-NEXT:    movge r0, #0
-; FULL-NEXT:    csel r1, r1, r12, lt
-; FULL-NEXT:    pop {r4, r5, r7, pc}
+; FULL-NEXT:    csel r1, r1, lr, lt
+; FULL-NEXT:    pop {r4, pc}
 entry:
   %conv = fptosi double %x to i128
   %0 = icmp slt i128 %conv, 9223372036854775807
@@ -1295,8 +1295,8 @@ define i64 @stest_f32i64(float %x) {
 ;
 ; FULL-LABEL: stest_f32i64:
 ; FULL:       @ %bb.0: @ %entry
-; FULL-NEXT:    .save {r4, r5, r7, lr}
-; FULL-NEXT:    push {r4, r5, r7, lr}
+; FULL-NEXT:    .save {r4, lr}
+; FULL-NEXT:    push {r4, lr}
 ; FULL-NEXT:    bl __fixsfti
 ; FULL-NEXT:    subs.w lr, r0, #-1
 ; FULL-NEXT:    mvn r12, #-2147483648
@@ -1305,20 +1305,20 @@ define i64 @stest_f32i64(float %x) {
 ; FULL-NEXT:    sbcs lr, r3, #0
 ; FULL-NEXT:    cset lr, lt
 ; FULL-NEXT:    cmp.w lr, #0
-; FULL-NEXT:    csel r5, r3, lr, ne
-; FULL-NEXT:    mov.w r3, #-1
-; FULL-NEXT:    csel r0, r0, r3, ne
+; FULL-NEXT:    csinv r0, r0, zr, eq
 ; FULL-NEXT:    csel r1, r1, r12, ne
+; FULL-NEXT:    csel r3, r3, lr, ne
 ; FULL-NEXT:    csel r2, r2, lr, ne
 ; FULL-NEXT:    rsbs r4, r0, #0
-; FULL-NEXT:    mov.w r12, #-2147483648
-; FULL-NEXT:    sbcs.w r4, r12, r1
-; FULL-NEXT:    sbcs.w r2, r3, r2
-; FULL-NEXT:    sbcs.w r2, r3, r5
+; FULL-NEXT:    mov.w lr, #-2147483648
+; FULL-NEXT:    sbcs.w r4, lr, r1
+; FULL-NEXT:    mov.w r12, #-1
+; FULL-NEXT:    sbcs.w r2, r12, r2
+; FULL-NEXT:    sbcs.w r2, r12, r3
 ; FULL-NEXT:    it ge
 ; FULL-NEXT:    movge r0, #0
-; FULL-NEXT:    csel r1, r1, r12, lt
-; FULL-NEXT:    pop {r4, r5, r7, pc}
+; FULL-NEXT:    csel r1, r1, lr, lt
+; FULL-NEXT:    pop {r4, pc}
 entry:
   %conv = fptosi float %x to i128
   %0 = icmp slt i128 %conv, 9223372036854775807
@@ -1556,8 +1556,8 @@ define i64 @stest_f16i64(half %x) {
 ;
 ; FULL-LABEL: stest_f16i64:
 ; FULL:       @ %bb.0: @ %entry
-; FULL-NEXT:    .save {r4, r5, r7, lr}
-; FULL-NEXT:    push {r4, r5, r7, lr}
+; FULL-NEXT:    .save {r4, lr}
+; FULL-NEXT:    push {r4, lr}
 ; FULL-NEXT:    vmov.f16 r0, s0
 ; FULL-NEXT:    vmov s0, r0
 ; FULL-NEXT:    bl __fixhfti
@@ -1568,20 +1568,20 @@ define i64 @stest_f16i64(half %x) {
 ; FULL-NEXT:    sbcs lr, r3, #0
 ; FULL-NEXT:    cset lr, lt
 ; FULL-NEXT:    cmp.w lr, #0
-; FULL-NEXT:    csel r5, r3, lr, ne
-; FULL-NEXT:    mov.w r3, #-1
-; FULL-NEXT:    csel r0, r0, r3, ne
+; FULL-NEXT:    csinv r0, r0, zr, eq
 ; FULL-NEXT:    csel r1, r1, r12, ne
+; FULL-NEXT:    csel r3, r3, lr, ne
 ; FULL-NEXT:    csel r2, r2, lr, ne
 ; FULL-NEXT:    rsbs r4, r0, #0
-; FULL-NEXT:    mov.w r12, #-2147483648
-; FULL-NEXT:    sbcs.w r4, r12, r1
-; FULL-NEXT:    sbcs.w r2, r3, r2
-; FULL-NEXT:    sbcs.w r2, r3, r5
+; FULL-NEXT:    mov.w lr, #-2147483648
+; FULL-NEXT:    sbcs.w r4, lr, r1
+; FULL-NEXT:    mov.w r12, #-1
+; FULL-NEXT:    sbcs.w r2, r12, r2
+; FULL-NEXT:    sbcs.w r2, r12, r3
 ; FULL-NEXT:    it ge
 ; FULL-NEXT:    movge r0, #0
-; FULL-NEXT:    csel r1, r1, r12, lt
-; FULL-NEXT:    pop {r4, r5, r7, pc}
+; FULL-NEXT:    csel r1, r1, lr, lt
+; FULL-NEXT:    pop {r4, pc}
 entry:
   %conv = fptosi half %x to i128
   %0 = icmp slt i128 %conv, 9223372036854775807
@@ -2708,8 +2708,8 @@ define i64 @stest_f64i64_mm(double %x) {
 ;
 ; FULL-LABEL: stest_f64i64_mm:
 ; FULL:       @ %bb.0: @ %entry
-; FULL-NEXT:    .save {r4, r5, r7, lr}
-; FULL-NEXT:    push {r4, r5, r7, lr}
+; FULL-NEXT:    .save {r4, lr}
+; FULL-NEXT:    push {r4, lr}
 ; FULL-NEXT:    bl __fixdfti
 ; FULL-NEXT:    subs.w lr, r0, #-1
 ; FULL-NEXT:    mvn r12, #-2147483648
@@ -2718,21 +2718,21 @@ define i64 @stest_f64i64_mm(double %x) {
 ; FULL-NEXT:    sbcs lr, r3, #0
 ; FULL-NEXT:    cset lr, lt
 ; FULL-NEXT:    cmp.w lr, #0
-; FULL-NEXT:    csel r5, r3, lr, ne
-; FULL-NEXT:    mov.w r3, #-1
-; FULL-NEXT:    csel r0, r0, r3, ne
+; FULL-NEXT:    csinv r0, r0, zr, eq
 ; FULL-NEXT:    csel r1, r1, r12, ne
+; FULL-NEXT:    csel r3, r3, lr, ne
 ; FULL-NEXT:    csel r2, r2, lr, ne
 ; FULL-NEXT:    rsbs r4, r0, #0
-; FULL-NEXT:    mov.w r12, #-2147483648
-; FULL-NEXT:    sbcs.w r4, r12, r1
-; FULL-NEXT:    sbcs.w r2, r3, r2
-; FULL-NEXT:    sbcs.w r2, r3, r5
+; FULL-NEXT:    mov.w lr, #-2147483648
+; FULL-NEXT:    sbcs.w r4, lr, r1
+; FULL-NEXT:    mov.w r12, #-1
+; FULL-NEXT:    sbcs.w r2, r12, r2
+; FULL-NEXT:    sbcs.w r2, r12, r3
 ; FULL-NEXT:    cset r2, lt
 ; FULL-NEXT:    cmp r2, #0
 ; FULL-NEXT:    csel r0, r0, r2, ne
-; FULL-NEXT:    csel r1, r1, r12, ne
-; FULL-NEXT:    pop {r4, r5, r7, pc}
+; FULL-NEXT:    csel r1, r1, lr, ne
+; FULL-NEXT:    pop {r4, pc}
 entry:
   %conv = fptosi double %x to i128
   %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -3021,8 +3021,8 @@ define i64 @stest_f32i64_mm(float %x) {
 ;
 ; FULL-LABEL: stest_f32i64_mm:
 ; FULL:       @ %bb.0: @ %entry
-; FULL-NEXT:    .save {r4, r5, r7, lr}
-; FULL-NEXT:    push {r4, r5, r7, lr}
+; FULL-NEXT:    .save {r4, lr}
+; FULL-NEXT:    push {r4, lr}
 ; FULL-NEXT:    bl __fixsfti
 ; FULL-NEXT:    subs.w lr, r0, #-1
 ; FULL-NEXT:    mvn r12, #-2147483648
@@ -3031,21 +3031,21 @@ define i64 @stest_f32i64_mm(float %x) {
 ; FULL-NEXT:    sbcs lr, r3, #0
 ; FULL-NEXT:    cset lr, lt
 ; FULL-NEXT:    cmp.w lr, #0
-; FULL-NEXT:    csel r5, r3, lr, ne
-; FULL-NEXT:    mov.w r3, #-1
-; FULL-NEXT:    csel r0, r0, r3, ne
+; FULL-NEXT:    csinv r0, r0, zr, eq
 ; FULL-NEXT:    csel r1, r1, r12, ne
+; FULL-NEXT:    csel r3, r3, lr, ne
 ; FULL-NEXT:    csel r2, r2, lr, ne
 ; FULL-NEXT:    rsbs r4, r0, #0
-; FULL-NEXT:    mov.w r12, #-2147483648
-; FULL-NEXT:    sbcs.w r4, r12, r1
-; FULL-NEXT:    sbcs.w r2, r3, r2
-; FULL-NEXT:    sbcs.w r2, r3, r5
+; FULL-NEXT:    mov.w lr, #-2147483648
+; FULL-NEXT:    sbcs.w r4, lr, r1
+; FULL-NEXT:    mov.w r12, #-1
+; FULL-NEXT:    sbcs.w r2, r12, r2
+; FULL-NEXT:    sbcs.w r2, r12, r3
 ; FULL-NEXT:    cset r2, lt
 ; FULL-NEXT:    cmp r2, #0
 ; FULL-NEXT:    csel r0, r0, r2, ne
-; FULL-NEXT:    csel r1, r1, r12, ne
-; FULL-NEXT:    pop {r4, r5, r7, pc}
+; FULL-NEXT:    csel r1, r1, lr, ne
+; FULL-NEXT:    pop {r4, pc}
 entry:
   %conv = fptosi float %x to i128
   %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -3339,8 +3339,8 @@ define i64 @stest_f16i64_mm(half %x) {
 ;
 ; FULL-LABEL: stest_f16i64_mm:
 ; FULL:       @ %bb.0: @ %entry
-; FULL-NEXT:    .save {r4, r5, r7, lr}
-; FULL-NEXT:    push {r4, r5, r7, lr}
+; FULL-NEXT:    .save {r4, lr}
+; FULL-NEXT:    push {r4, lr}
 ; FULL-NEXT:    vmov.f16 r0, s0
 ; FULL-NEXT:    vmov s0, r0
 ; FULL-NEXT:    bl __fixhfti
@@ -3351,21 +3351,21 @@ define i64 @stest_f16i64_mm(half %x) {
 ; FULL-NEXT:    sbcs lr, r3, #0
 ; FULL-NEXT:    cset lr, lt
 ; FULL-NEXT:    cmp.w lr, #0
-; FULL-NEXT:    csel r5, r3, lr, ne
-; FULL-NEXT:    mov.w r3, #-1
-; FULL-NEXT:    csel r0, r0, r3, ne
+; FULL-NEXT:    csinv r0, r0, zr, eq
 ; FULL-NEXT:    csel r1, r1, r12, ne
+; FULL-NEXT:    csel r3, r3, lr, ne
 ; FULL-NEXT:    csel r2, r2, lr, ne
 ; FULL-NEXT:    rsbs r4, r0, #0
-; FULL-NEXT:    mov.w r12, #-2147483648
-; FULL-NEXT:    sbcs.w r4, r12, r1
-; FULL-NEXT:    sbcs.w r2, r3, r2
-; FULL-NEXT:    sbcs.w r2, r3, r5
+; FULL-NEXT:    mov.w lr, #-2147483648
+; FULL-NEXT:    sbcs.w r4, lr, r1
+; FULL-NEXT:    mov.w r12, #-1
+; FULL-NEXT:    sbcs.w r2, r12, r2
+; FULL-NEXT:    sbcs.w r2, r12, r3
 ; FULL-NEXT:    cset r2, lt
 ; FULL-NEXT:    cmp r2, #0
 ; FULL-NEXT:    csel r0, r0, r2, ne
-; FULL-NEXT:    csel r1, r1, r12, ne
-; FULL-NEXT:    pop {r4, r5, r7, pc}
+; FULL-NEXT:    csel r1, r1, lr, ne
+; FULL-NEXT:    pop {r4, pc}
 entry:
   %conv = fptosi half %x to i128
   %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
index 5179f97624489..a42a2a8083f6f 100644
--- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
@@ -633,40 +633,74 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
 ; SOFT-NEXT:  .LCPI6_2:
 ; SOFT-NEXT:    .long 131071 @ 0x1ffff
 ;
-; VFP-LABEL: test_signed_i50_f32:
-; VFP:       @ %bb.0:
-; VFP-NEXT:    .save {r4, lr}
-; VFP-NEXT:    push {r4, lr}
-; VFP-NEXT:    mov r4, r0
-; VFP-NEXT:    bl __aeabi_f2lz
-; VFP-NEXT:    vldr s0, .LCPI6_0
-; VFP-NEXT:    vmov s2, r4
-; VFP-NEXT:    vldr s4, .LCPI6_1
-; VFP-NEXT:    vcmp.f32 s2, s0
-; VFP-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP-NEXT:    ittt lt
-; VFP-NEXT:    movlt r1, #0
-; VFP-NEXT:    movtlt r1, #65534
-; VFP-NEXT:    movlt r0, #0
-; VFP-NEXT:    vcmp.f32 s2, s4
-; VFP-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP-NEXT:    it gt
-; VFP-NEXT:    movgt.w r0, #-1
-; VFP-NEXT:    vcmp.f32 s2, s2
-; VFP-NEXT:    itt gt
-; VFP-NEXT:    movwgt r1, #65535
-; VFP-NEXT:    movtgt r1, #1
-; VFP-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP-NEXT:    itt vs
-; VFP-NEXT:    movvs r0, #0
-; VFP-NEXT:    movvs r1, #0
-; VFP-NEXT:    pop {r4, pc}
-; VFP-NEXT:    .p2align 2
-; VFP-NEXT:  @ %bb.1:
-; VFP-NEXT:  .LCPI6_0:
-; VFP-NEXT:    .long 0xd8000000 @ float -5.62949953E+14
-; VFP-NEXT:  .LCPI6_1:
-; VFP-NEXT:    .long 0x57ffffff @ float 5.6294992E+14
+; VFP2-LABEL: test_signed_i50_f32:
+; VFP2:       @ %bb.0:
+; VFP2-NEXT:    .save {r4, lr}
+; VFP2-NEXT:    push {r4, lr}
+; VFP2-NEXT:    mov r4, r0
+; VFP2-NEXT:    bl __aeabi_f2lz
+; VFP2-NEXT:    vldr s0, .LCPI6_0
+; VFP2-NEXT:    vmov s2, r4
+; VFP2-NEXT:    vldr s4, .LCPI6_1
+; VFP2-NEXT:    vcmp.f32 s2, s0
+; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
+; VFP2-NEXT:    ittt lt
+; VFP2-NEXT:    movlt r1, #0
+; VFP2-NEXT:    movtlt r1, #65534
+; VFP2-NEXT:    movlt r0, #0
+; VFP2-NEXT:    vcmp.f32 s2, s4
+; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
+; VFP2-NEXT:    it gt
+; VFP2-NEXT:    movgt.w r0, #-1
+; VFP2-NEXT:    vcmp.f32 s2, s2
+; VFP2-NEXT:    itt gt
+; VFP2-NEXT:    movwgt r1, #65535
+; VFP2-NEXT:    movtgt r1, #1
+; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
+; VFP2-NEXT:    itt vs
+; VFP2-NEXT:    movvs r0, #0
+; VFP2-NEXT:    movvs r1, #0
+; VFP2-NEXT:    pop {r4, pc}
+; VFP2-NEXT:    .p2align 2
+; VFP2-NEXT:  @ %bb.1:
+; VFP2-NEXT:  .LCPI6_0:
+; VFP2-NEXT:    .long 0xd8000000 @ float -5.62949953E+14
+; VFP2-NEXT:  .LCPI6_1:
+; VFP2-NEXT:    .long 0x57ffffff @ float 5.6294992E+14
+;
+; FP16-LABEL: test_signed_i50_f32:
+; FP16:       @ %bb.0:
+; FP16-NEXT:    .save {r4, lr}
+; FP16-NEXT:    push {r4, lr}
+; FP16-NEXT:    mov r4, r0
+; FP16-NEXT:    bl __aeabi_f2lz
+; FP16-NEXT:    vldr s0, .LCPI6_0
+; FP16-NEXT:    vmov s2, r4
+; FP16-NEXT:    vldr s4, .LCPI6_1
+; FP16-NEXT:    vcmp.f32 s2, s0
+; FP16-NEXT:    vmrs APSR_nzcv, fpscr
+; FP16-NEXT:    vcmp.f32 s2, s4
+; FP16-NEXT:    ittt lt
+; FP16-NEXT:    movlt r1, #0
+; FP16-NEXT:    movtlt r1, #65534
+; FP16-NEXT:    movlt r0, #0
+; FP16-NEXT:    vmrs APSR_nzcv, fpscr
+; FP16-NEXT:    vcmp.f32 s2, s2
+; FP16-NEXT:    itt gt
+; FP16-NEXT:    movwgt r1, #65535
+; FP16-NEXT:    movtgt r1, #1
+; FP16-NEXT:    csinv r0, r0, zr, gt
+; FP16-NEXT:    vmrs APSR_nzcv, fpscr
+; FP16-NEXT:    itt vs
+; FP16-NEXT:    movvs r0, #0
+; FP16-NEXT:    movvs r1, #0
+; FP16-NEXT:    pop {r4, pc}
+; FP16-NEXT:    .p2align 2
+; FP16-NEXT:  @ %bb.1:
+; FP16-NEXT:  .LCPI6_0:
+; FP16-NEXT:    .long 0xd8000000 @ float -5.62949953E+14
+; FP16-NEXT:  .LCPI6_1:
+; FP16-NEXT:    .long 0x57ffffff @ float 5.6294992E+14
     %x = call i50 @llvm.fptosi.sat.i50.f32(float %f)
     ret i50 %x
 }
@@ -735,37 +769,69 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
 ; SOFT-NEXT:  .LCPI7_1:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
-; VFP-LABEL: test_signed_i64_f32:
-; VFP:       @ %bb.0:
-; VFP-NEXT:    .save {r4, lr}
-; VFP-NEXT:    push {r4, lr}
-; VFP-NEXT:    mov r4, r0
-; VFP-NEXT:    bl __aeabi_f2lz
-; VFP-NEXT:    vldr s0, .LCPI7_0
-; VFP-NEXT:    vmov s2, r4
-; VFP-NEXT:    vldr s4, .LCPI7_1
-; VFP-NEXT:    vcmp.f32 s2, s0
-; VFP-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP-NEXT:    itt lt
-; VFP-NEXT:    movlt r0, #0
-; VFP-NEXT:    movlt.w r1, #-2147483648
-; VFP-NEXT:    vcmp.f32 s2, s4
-; VFP-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP-NEXT:    itt gt
-; VFP-NEXT:    mvngt r1, #-2147483648
-; VFP-NEXT:    movgt.w r0, #-1
-; VFP-NEXT:    vcmp.f32 s2, s2
-; VFP-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP-NEXT:    itt vs
-; VFP-NEXT:    movvs r0, #0
-; VFP-NEXT:    movvs r1, #0
-; VFP-NEXT:    pop {r4, pc}
-; VFP-NEXT:    .p2align 2
-; VFP-NEXT:  @ %bb.1:
-; VFP-NEXT:  .LCPI7_0:
-; VFP-NEXT:    .long 0xdf000000 @ float -9.22337203E+18
-; VFP-NEXT:  .LCPI7_1:
-; VFP-NEXT:    .long 0x5effffff @ float 9.22337149E+18
+; VFP2-LABEL: test_signed_i64_f32:
+; VFP2:       @ %bb.0:
+; VFP2-NEXT:    .save {r4, lr}
+; VFP2-NEXT:    push {r4, lr}
+; VFP2-NEXT:    mov r4, r0
+; VFP2-NEXT:    bl __aeabi_f2lz
+; VFP2-NEXT:    vldr s0, .LCPI7_0
+; VFP2-NEXT:    vmov s2, r4
+; VFP2-NEXT:    vldr s4, .LCPI7_1
+; VFP2-NEXT:    vcmp.f32 s2, s0
+; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
+; VFP2-NEXT:    itt lt
+; VFP2-NEXT:    movlt r0, #0
+; VFP2-NEXT:    movlt.w r1, #-2147483648
+; VFP2-NEXT:    vcmp.f32 s2, s4
+; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
+; VFP2-NEXT:    itt gt
+; VFP2-NEXT:    mvngt r1, #-2147483648
+; VFP2-NEXT:    movgt.w r0, #-1
+; VFP2-NEXT:    vcmp.f32 s2, s2
+; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
+; VFP2-NEXT:    itt vs
+; VFP2-NEXT:    movvs r0, #0
+; VFP2-NEXT:    movvs r1, #0
+; VFP2-NEXT:    pop {r4, pc}
+; VFP2-NEXT:    .p2align 2
+; VFP2-NEXT:  @ %bb.1:
+; VFP2-NEXT:  .LCPI7_0:
+; VFP2-NEXT:    .long 0xdf000000 @ float -9.22337203E+18
+; VFP2-NEXT:  .LCPI7_1:
+; VFP2-NEXT:    .long 0x5effffff @ float 9.22337149E+18
+;
+; FP16-LABEL: test_signed_i64_f32:
+; FP16:       @ %bb.0:
+; FP16-NEXT:    .save {r4, lr}
+; FP16-NEXT:    push {r4, lr}
+; FP16-NEXT:    mov r4, r0
+; FP16-NEXT:    bl __aeabi_f2lz
+; FP16-NEXT:    vldr s0, .LCPI7_0
+; FP16-NEXT:    vmov s2, r4
+; FP16-NEXT:    vldr s4, .LCPI7_1
+; FP16-NEXT:    vcmp.f32 s2, s0
+; FP16-NEXT:    vmrs APSR_nzcv, fpscr
+; FP16-NEXT:    vcmp.f32 s2, s4
+; FP16-NEXT:    itt lt
+; FP16-NEXT:    movlt r0, #0
+; FP16-NEXT:    movlt.w r1, #-2147483648
+; FP16-NEXT:    vmrs APSR_nzcv, fpscr
+; FP16-NEXT:    it gt
+; FP16-NEXT:    mvngt r1, #-2147483648
+; FP16-NEXT:    vcmp.f32 s2, s2
+; FP16-NEXT:    csinv r0, r0, zr, gt
+; FP16-NEXT:    vmrs APSR_nzcv, fpscr
+; FP16-NEXT:    itt vs
+; FP16-NEXT:    movvs r0, #0
+; FP16-NEXT:    movvs r1, #0
+; FP16-NEXT:    pop {r4, pc}
+; FP16-NEXT:    .p2align 2
+; FP16-NEXT:  @ %bb.1:
+; FP16-NEXT:  .LCPI7_0:
+; FP16-NEXT:    .long 0xdf000000 @ float -9.22337203E+18
+; FP16-NEXT:  .LCPI7_1:
+; FP16-NEXT:    .long 0x5effffff @ float 9.22337149E+18
     %x = call i64 @llvm.fptosi.sat.i64.f32(float %f)
     ret i64 %x
 }
@@ -880,43 +946,81 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
 ; SOFT-NEXT:  .LCPI8_0:
 ; SOFT-NEXT:    .long 1895825407 @ 0x70ffffff
 ;
-; VFP-LABEL: test_signed_i100_f32:
-; VFP:       @ %bb.0:
-; VFP-NEXT:    .save {r4, lr}
-; VFP-NEXT:    push {r4, lr}
-; VFP-NEXT:    mov r4, r0
-; VFP-NEXT:    bl __fixsfti
-; VFP-NEXT:    vldr s0, .LCPI8_0
-; VFP-NEXT:    vmov s2, r4
-; VFP-NEXT:    vldr s4, .LCPI8_1
-; VFP-NEXT:    vcmp.f32 s2, s0
-; VFP-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP-NEXT:    itttt lt
-; VFP-NEXT:    movlt r0, #0
-; VFP-NEXT:    movlt r1, #0
-; VFP-NEXT:    movlt r2, #0
-; VFP-NEXT:    mvnlt r3, #7
-; VFP-NEXT:    vcmp.f32 s2, s4
-; VFP-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP-NEXT:    itttt gt
-; VFP-NEXT:    movgt r3, #7
-; VFP-NEXT:    movgt.w r2, #-1
-; VFP-NEXT:    movgt.w r1, #-1
-; VFP-NEXT:    movgt.w r0, #-1
-; VFP-NEXT:    vcmp.f32 s2, s2
-; VFP-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP-NEXT:    itttt vs
-; VFP-NEXT:    movvs r0, #0
-; VFP-NEXT:    movvs r1, #0
-; VFP-NEXT:    movvs r2, #0
-; VFP-NEXT:    movvs r3, #0
-; VFP-NEXT:    pop {r4, pc}
-; VFP-NEXT:    .p2align 2
-; VFP-NEXT:  @ %bb.1:
-; VFP-NEXT:  .LCPI8_0:
-; VFP-NEXT:    .long 0xf1000000 @ float -6.338253E+29
-; VFP-NEXT:  .LCPI8_1:
-; VFP-NEXT:    .long 0x70ffffff @ float 6.33825262E+29
+; VFP2-LABEL: test_signed_i100_f32:
+; VFP2:       @ %bb.0:
+; VFP2-NEXT:    .save {r4, lr}
+; VFP2-NEXT:    push {r4, lr}
+; VFP2-NEXT:    mov r4, r0
+; VFP2-NEXT:    bl __fixsfti
+; VFP2-NEXT:    vldr s0, .LCPI8_0
+; VFP2-NEXT:    vmov s2, r4
+; VFP2-NEXT:    vldr s4, .LCPI8_1
+; VFP2-NEXT:    vcmp.f32 s2, s0
+; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
+; VFP2-NEXT:    itttt lt
+; VFP2-NEXT:    movlt r0, #0
+; VFP2-NEXT:    movlt r1, #0
+; VFP2-NEXT:    movlt r2, #0
+; VFP2-NEXT:    mvnlt r3, #7
+; VFP2-NEXT:    vcmp.f32 s2, s4
+; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
+; VFP2-NEXT:    itttt gt
+; VFP2-NEXT:    movgt r3, #7
+; VFP2-NEXT:    movgt.w r2, #-1
+; VFP2-NEXT:    movgt.w r1, #-1
+; VFP2-NEXT:    movgt.w r0, #-1
+; VFP2-NEXT:    vcmp.f32 s2, s2
+; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
+; VFP2-NEXT:    itttt vs
+; VFP2-NEXT:    movvs r0, #0
+; VFP2-NEXT:    movvs r1, #0
+; VFP2-NEXT:    movvs r2, #0
+; VFP2-NEXT:    movvs r3, #0
+; VFP2-NEXT:    pop {r4, pc}
+; VFP2-NEXT:    .p2align 2
+; VFP2-NEXT:  @ %bb.1:
+; VFP2-NEXT:  .LCPI8_0:
+; VFP2-NEXT:    .long 0xf1000000 @ float -6.338253E+29
+; VFP2-NEXT:  .LCPI8_1:
+; VFP2-NEXT:    .long 0x70ffffff @ float 6.33825262E+29
+;
+; FP16-LABEL: test_signed_i100_f32:
+; FP16:       @ %bb.0:
+; FP16-NEXT:    .save {r4, lr}
+; FP16-NEXT:    push {r4, lr}
+; FP16-NEXT:    mov r4, r0
+; FP16-NEXT:    bl __fixsfti
+; FP16-NEXT:    vldr s0, .LCPI8_0
+; FP16-NEXT:    vmov s2, r4
+; FP16-NEXT:    vldr s4, .LCPI8_1
+; FP16-NEXT:    vcmp.f32 s2, s0
+; FP16-NEXT:    vmrs APSR_nzcv, fpscr
+; FP16-NEXT:    vcmp.f32 s2, s4
+; FP16-NEXT:    itttt lt
+; FP16-NEXT:    movlt r0, #0
+; FP16-NEXT:    movlt r1, #0
+; FP16-NEXT:    movlt r2, #0
+; FP16-NEXT:    mvnlt r3, #7
+; FP16-NEXT:    vmrs APSR_nzcv, fpscr
+; FP16-NEXT:    it gt
+; FP16-NEXT:    movgt r3, #7
+; FP16-NEXT:    vcmp.f32 s2, s2
+; FP16-NEXT:    csinv r2, r2, zr, gt
+; FP16-NEXT:    csinv r1, r1, zr, gt
+; FP16-NEXT:    csinv r0, r0, zr, gt
+; FP16-NEXT:    vmrs APSR_nzcv, fpscr
+; FP16-NEXT:    itttt vs
+; FP16-NEXT:    movvs r0, #0
+; FP...
[truncated]

@nikic nikic removed their request for review August 11, 2025 07:36
@RKSimon
Copy link
Collaborator

RKSimon commented Aug 12, 2025

Isn't this to just "Port AArch64's CSel handling patterns to Thumb2" and not ARM in general?

@RKSimon RKSimon requested a review from davemgreen August 12, 2025 15:51
@AZero13 AZero13 changed the title [ARM] Port AArch64's CSel handling patterns to ARM [ARM] Port AArch64's CSel handling patterns to Thumb2 Aug 12, 2025
@AZero13
Copy link
Contributor Author

AZero13 commented Aug 12, 2025

Isn't this to just "Port AArch64's CSel handling patterns to Thumb2" and not ARM in general?

Yeah

@davemgreen
Copy link
Collaborator

davemgreen commented Aug 13, 2025

ARMcmov works the other way around, this is inverting the condition for the wrong pattern. Make sure you test the patches thoroughly, surprising things can go wrong.

(Otherwise this sounds like a useful addition - providing the longer length of a csel doesn't outweigh the benefit of usually not requiring the IT block).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants