diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 8f56fb0938dd0..cd26e19378170 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -5765,6 +5765,15 @@ let Predicates = [HasV8_1MMainline] in { def : T2Pat<(ARMcmov (i32 0), (i32 -1), imm:$cc, CPSR), (t2CSINV ZR, ZR, (inv_cond_XFORM imm:$cc))>; + def : T2Pat<(ARMcmov GPRwithZR:$tval, (i32 1), imm:$cc, CPSR), + (t2CSINC GPRwithZR:$tval, ZR, imm:$cc)>; + def : T2Pat<(ARMcmov (i32 1), GPRwithZR:$fval, imm:$cc, CPSR), + (t2CSINC GPRwithZR:$fval, ZR, (inv_cond_XFORM imm:$cc))>; + def : T2Pat<(ARMcmov GPRwithZR:$tval, (i32 -1), imm:$cc, CPSR), + (t2CSINV GPRwithZR:$tval, ZR, imm:$cc)>; + def : T2Pat<(ARMcmov (i32 -1), GPRwithZR:$fval, imm:$cc, CPSR), + (t2CSINV GPRwithZR:$fval, ZR, (inv_cond_XFORM imm:$cc))>; + multiclass ModifiedV8_1CSEL { def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, imm:$cc, CPSR), (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc)>; diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 8ab56b228d2a7..87a1221088e09 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -1039,8 +1039,8 @@ define i64 @stest_f64i64(double %x) { ; ; FULL-LABEL: stest_f64i64: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} +; FULL-NEXT: .save {r4, lr} +; FULL-NEXT: push {r4, lr} ; FULL-NEXT: bl __fixdfti ; FULL-NEXT: subs.w lr, r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 @@ -1049,20 +1049,20 @@ define i64 @stest_f64i64(double %x) { ; FULL-NEXT: sbcs lr, r3, #0 ; FULL-NEXT: cset lr, lt ; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne +; FULL-NEXT: csinv r0, r0, zr, eq ; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: csel r3, r3, lr, ne ; FULL-NEXT: csel r2, r2, lr, ne ; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: mov.w lr, #-2147483648 +; FULL-NEXT: sbcs.w r4, lr, r1 +; FULL-NEXT: mov.w r12, #-1 +; FULL-NEXT: sbcs.w r2, r12, r2 +; FULL-NEXT: sbcs.w r2, r12, r3 ; FULL-NEXT: it ge ; FULL-NEXT: movge r0, #0 -; FULL-NEXT: csel r1, r1, r12, lt -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL-NEXT: csel r1, r1, lr, lt +; FULL-NEXT: pop {r4, pc} entry: %conv = fptosi double %x to i128 %0 = icmp slt i128 %conv, 9223372036854775807 @@ -1295,8 +1295,8 @@ define i64 @stest_f32i64(float %x) { ; ; FULL-LABEL: stest_f32i64: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} +; FULL-NEXT: .save {r4, lr} +; FULL-NEXT: push {r4, lr} ; FULL-NEXT: bl __fixsfti ; FULL-NEXT: subs.w lr, r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 @@ -1305,20 +1305,20 @@ define i64 @stest_f32i64(float %x) { ; FULL-NEXT: sbcs lr, r3, #0 ; FULL-NEXT: cset lr, lt ; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne +; FULL-NEXT: csinv r0, r0, zr, eq ; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: csel r3, r3, lr, ne ; FULL-NEXT: csel r2, r2, lr, ne ; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: mov.w lr, #-2147483648 +; FULL-NEXT: sbcs.w r4, lr, r1 +; FULL-NEXT: mov.w r12, #-1 +; FULL-NEXT: sbcs.w r2, r12, r2 +; FULL-NEXT: sbcs.w r2, r12, r3 ; FULL-NEXT: it ge ; FULL-NEXT: movge r0, #0 -; FULL-NEXT: csel r1, r1, r12, lt -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL-NEXT: csel r1, r1, lr, lt +; FULL-NEXT: pop {r4, pc} entry: %conv = fptosi float %x to i128 %0 = icmp slt i128 %conv, 9223372036854775807 @@ -1556,8 +1556,8 @@ define i64 @stest_f16i64(half %x) { ; ; FULL-LABEL: stest_f16i64: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} +; FULL-NEXT: .save {r4, lr} +; FULL-NEXT: push {r4, lr} ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti @@ -1568,20 +1568,20 @@ define i64 @stest_f16i64(half %x) { ; FULL-NEXT: sbcs lr, r3, #0 ; FULL-NEXT: cset lr, lt ; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne +; FULL-NEXT: csinv r0, r0, zr, eq ; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: csel r3, r3, lr, ne ; FULL-NEXT: csel r2, r2, lr, ne ; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: mov.w lr, #-2147483648 +; FULL-NEXT: sbcs.w r4, lr, r1 +; FULL-NEXT: mov.w r12, #-1 +; FULL-NEXT: sbcs.w r2, r12, r2 +; FULL-NEXT: sbcs.w r2, r12, r3 ; FULL-NEXT: it ge ; FULL-NEXT: movge r0, #0 -; FULL-NEXT: csel r1, r1, r12, lt -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL-NEXT: csel r1, r1, lr, lt +; FULL-NEXT: pop {r4, pc} entry: %conv = fptosi half %x to i128 %0 = icmp slt i128 %conv, 9223372036854775807 @@ -2708,8 +2708,8 @@ define i64 @stest_f64i64_mm(double %x) { ; ; FULL-LABEL: stest_f64i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} +; FULL-NEXT: .save {r4, lr} +; FULL-NEXT: push {r4, lr} ; FULL-NEXT: bl __fixdfti ; FULL-NEXT: subs.w lr, r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 @@ -2718,21 +2718,21 @@ define i64 @stest_f64i64_mm(double %x) { ; FULL-NEXT: sbcs lr, r3, #0 ; FULL-NEXT: cset lr, lt ; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne +; FULL-NEXT: csinv r0, r0, zr, eq ; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: csel r3, r3, lr, ne ; FULL-NEXT: csel r2, r2, lr, ne ; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: mov.w lr, #-2147483648 +; FULL-NEXT: sbcs.w r4, lr, r1 +; FULL-NEXT: mov.w r12, #-1 +; FULL-NEXT: sbcs.w r2, r12, r2 +; FULL-NEXT: sbcs.w r2, r12, r3 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL-NEXT: csel r1, r1, lr, ne +; FULL-NEXT: pop {r4, pc} entry: %conv = fptosi double %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -3021,8 +3021,8 @@ define i64 @stest_f32i64_mm(float %x) { ; ; FULL-LABEL: stest_f32i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} +; FULL-NEXT: .save {r4, lr} +; FULL-NEXT: push {r4, lr} ; FULL-NEXT: bl __fixsfti ; FULL-NEXT: subs.w lr, r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 @@ -3031,21 +3031,21 @@ define i64 @stest_f32i64_mm(float %x) { ; FULL-NEXT: sbcs lr, r3, #0 ; FULL-NEXT: cset lr, lt ; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne +; FULL-NEXT: csinv r0, r0, zr, eq ; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: csel r3, r3, lr, ne ; FULL-NEXT: csel r2, r2, lr, ne ; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: mov.w lr, #-2147483648 +; FULL-NEXT: sbcs.w r4, lr, r1 +; FULL-NEXT: mov.w r12, #-1 +; FULL-NEXT: sbcs.w r2, r12, r2 +; FULL-NEXT: sbcs.w r2, r12, r3 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL-NEXT: csel r1, r1, lr, ne +; FULL-NEXT: pop {r4, pc} entry: %conv = fptosi float %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -3339,8 +3339,8 @@ define i64 @stest_f16i64_mm(half %x) { ; ; FULL-LABEL: stest_f16i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} +; FULL-NEXT: .save {r4, lr} +; FULL-NEXT: push {r4, lr} ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti @@ -3351,21 +3351,21 @@ define i64 @stest_f16i64_mm(half %x) { ; FULL-NEXT: sbcs lr, r3, #0 ; FULL-NEXT: cset lr, lt ; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne +; FULL-NEXT: csinv r0, r0, zr, eq ; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: csel r3, r3, lr, ne ; FULL-NEXT: csel r2, r2, lr, ne ; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: mov.w lr, #-2147483648 +; FULL-NEXT: sbcs.w r4, lr, r1 +; FULL-NEXT: mov.w r12, #-1 +; FULL-NEXT: sbcs.w r2, r12, r2 +; FULL-NEXT: sbcs.w r2, r12, r3 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL-NEXT: csel r1, r1, lr, ne +; FULL-NEXT: pop {r4, pc} entry: %conv = fptosi half %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll index 5179f97624489..a42a2a8083f6f 100644 --- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll @@ -633,40 +633,74 @@ define i50 @test_signed_i50_f32(float %f) nounwind { ; SOFT-NEXT: .LCPI6_2: ; SOFT-NEXT: .long 131071 @ 0x1ffff ; -; VFP-LABEL: test_signed_i50_f32: -; VFP: @ %bb.0: -; VFP-NEXT: .save {r4, lr} -; VFP-NEXT: push {r4, lr} -; VFP-NEXT: mov r4, r0 -; VFP-NEXT: bl __aeabi_f2lz -; VFP-NEXT: vldr s0, .LCPI6_0 -; VFP-NEXT: vmov s2, r4 -; VFP-NEXT: vldr s4, .LCPI6_1 -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: ittt lt -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: movtlt r1, #65534 -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it gt -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: itt gt -; VFP-NEXT: movwgt r1, #65535 -; VFP-NEXT: movtgt r1, #1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itt vs -; VFP-NEXT: movvs r0, #0 -; VFP-NEXT: movvs r1, #0 -; VFP-NEXT: pop {r4, pc} -; VFP-NEXT: .p2align 2 -; VFP-NEXT: @ %bb.1: -; VFP-NEXT: .LCPI6_0: -; VFP-NEXT: .long 0xd8000000 @ float -5.62949953E+14 -; VFP-NEXT: .LCPI6_1: -; VFP-NEXT: .long 0x57ffffff @ float 5.6294992E+14 +; VFP2-LABEL: test_signed_i50_f32: +; VFP2: @ %bb.0: +; VFP2-NEXT: .save {r4, lr} +; VFP2-NEXT: push {r4, lr} +; VFP2-NEXT: mov r4, r0 +; VFP2-NEXT: bl __aeabi_f2lz +; VFP2-NEXT: vldr s0, .LCPI6_0 +; VFP2-NEXT: vmov s2, r4 +; VFP2-NEXT: vldr s4, .LCPI6_1 +; VFP2-NEXT: vcmp.f32 s2, s0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: ittt lt +; VFP2-NEXT: movlt r1, #0 +; VFP2-NEXT: movtlt r1, #65534 +; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vcmp.f32 s2, s4 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: it gt +; VFP2-NEXT: movgt.w r0, #-1 +; VFP2-NEXT: vcmp.f32 s2, s2 +; VFP2-NEXT: itt gt +; VFP2-NEXT: movwgt r1, #65535 +; VFP2-NEXT: movtgt r1, #1 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itt vs +; VFP2-NEXT: movvs r0, #0 +; VFP2-NEXT: movvs r1, #0 +; VFP2-NEXT: pop {r4, pc} +; VFP2-NEXT: .p2align 2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: .LCPI6_0: +; VFP2-NEXT: .long 0xd8000000 @ float -5.62949953E+14 +; VFP2-NEXT: .LCPI6_1: +; VFP2-NEXT: .long 0x57ffffff @ float 5.6294992E+14 +; +; FP16-LABEL: test_signed_i50_f32: +; FP16: @ %bb.0: +; FP16-NEXT: .save {r4, lr} +; FP16-NEXT: push {r4, lr} +; FP16-NEXT: mov r4, r0 +; FP16-NEXT: bl __aeabi_f2lz +; FP16-NEXT: vldr s0, .LCPI6_0 +; FP16-NEXT: vmov s2, r4 +; FP16-NEXT: vldr s4, .LCPI6_1 +; FP16-NEXT: vcmp.f32 s2, s0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: vcmp.f32 s2, s4 +; FP16-NEXT: ittt lt +; FP16-NEXT: movlt r1, #0 +; FP16-NEXT: movtlt r1, #65534 +; FP16-NEXT: movlt r0, #0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: vcmp.f32 s2, s2 +; FP16-NEXT: itt gt +; FP16-NEXT: movwgt r1, #65535 +; FP16-NEXT: movtgt r1, #1 +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: itt vs +; FP16-NEXT: movvs r0, #0 +; FP16-NEXT: movvs r1, #0 +; FP16-NEXT: pop {r4, pc} +; FP16-NEXT: .p2align 2 +; FP16-NEXT: @ %bb.1: +; FP16-NEXT: .LCPI6_0: +; FP16-NEXT: .long 0xd8000000 @ float -5.62949953E+14 +; FP16-NEXT: .LCPI6_1: +; FP16-NEXT: .long 0x57ffffff @ float 5.6294992E+14 %x = call i50 @llvm.fptosi.sat.i50.f32(float %f) ret i50 %x } @@ -735,37 +769,69 @@ define i64 @test_signed_i64_f32(float %f) nounwind { ; SOFT-NEXT: .LCPI7_1: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; -; VFP-LABEL: test_signed_i64_f32: -; VFP: @ %bb.0: -; VFP-NEXT: .save {r4, lr} -; VFP-NEXT: push {r4, lr} -; VFP-NEXT: mov r4, r0 -; VFP-NEXT: bl __aeabi_f2lz -; VFP-NEXT: vldr s0, .LCPI7_0 -; VFP-NEXT: vmov s2, r4 -; VFP-NEXT: vldr s4, .LCPI7_1 -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itt lt -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: movlt.w r1, #-2147483648 -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itt gt -; VFP-NEXT: mvngt r1, #-2147483648 -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itt vs -; VFP-NEXT: movvs r0, #0 -; VFP-NEXT: movvs r1, #0 -; VFP-NEXT: pop {r4, pc} -; VFP-NEXT: .p2align 2 -; VFP-NEXT: @ %bb.1: -; VFP-NEXT: .LCPI7_0: -; VFP-NEXT: .long 0xdf000000 @ float -9.22337203E+18 -; VFP-NEXT: .LCPI7_1: -; VFP-NEXT: .long 0x5effffff @ float 9.22337149E+18 +; VFP2-LABEL: test_signed_i64_f32: +; VFP2: @ %bb.0: +; VFP2-NEXT: .save {r4, lr} +; VFP2-NEXT: push {r4, lr} +; VFP2-NEXT: mov r4, r0 +; VFP2-NEXT: bl __aeabi_f2lz +; VFP2-NEXT: vldr s0, .LCPI7_0 +; VFP2-NEXT: vmov s2, r4 +; VFP2-NEXT: vldr s4, .LCPI7_1 +; VFP2-NEXT: vcmp.f32 s2, s0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itt lt +; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: movlt.w r1, #-2147483648 +; VFP2-NEXT: vcmp.f32 s2, s4 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itt gt +; VFP2-NEXT: mvngt r1, #-2147483648 +; VFP2-NEXT: movgt.w r0, #-1 +; VFP2-NEXT: vcmp.f32 s2, s2 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itt vs +; VFP2-NEXT: movvs r0, #0 +; VFP2-NEXT: movvs r1, #0 +; VFP2-NEXT: pop {r4, pc} +; VFP2-NEXT: .p2align 2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: .LCPI7_0: +; VFP2-NEXT: .long 0xdf000000 @ float -9.22337203E+18 +; VFP2-NEXT: .LCPI7_1: +; VFP2-NEXT: .long 0x5effffff @ float 9.22337149E+18 +; +; FP16-LABEL: test_signed_i64_f32: +; FP16: @ %bb.0: +; FP16-NEXT: .save {r4, lr} +; FP16-NEXT: push {r4, lr} +; FP16-NEXT: mov r4, r0 +; FP16-NEXT: bl __aeabi_f2lz +; FP16-NEXT: vldr s0, .LCPI7_0 +; FP16-NEXT: vmov s2, r4 +; FP16-NEXT: vldr s4, .LCPI7_1 +; FP16-NEXT: vcmp.f32 s2, s0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: vcmp.f32 s2, s4 +; FP16-NEXT: itt lt +; FP16-NEXT: movlt r0, #0 +; FP16-NEXT: movlt.w r1, #-2147483648 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: it gt +; FP16-NEXT: mvngt r1, #-2147483648 +; FP16-NEXT: vcmp.f32 s2, s2 +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: itt vs +; FP16-NEXT: movvs r0, #0 +; FP16-NEXT: movvs r1, #0 +; FP16-NEXT: pop {r4, pc} +; FP16-NEXT: .p2align 2 +; FP16-NEXT: @ %bb.1: +; FP16-NEXT: .LCPI7_0: +; FP16-NEXT: .long 0xdf000000 @ float -9.22337203E+18 +; FP16-NEXT: .LCPI7_1: +; FP16-NEXT: .long 0x5effffff @ float 9.22337149E+18 %x = call i64 @llvm.fptosi.sat.i64.f32(float %f) ret i64 %x } @@ -880,43 +946,81 @@ define i100 @test_signed_i100_f32(float %f) nounwind { ; SOFT-NEXT: .LCPI8_0: ; SOFT-NEXT: .long 1895825407 @ 0x70ffffff ; -; VFP-LABEL: test_signed_i100_f32: -; VFP: @ %bb.0: -; VFP-NEXT: .save {r4, lr} -; VFP-NEXT: push {r4, lr} -; VFP-NEXT: mov r4, r0 -; VFP-NEXT: bl __fixsfti -; VFP-NEXT: vldr s0, .LCPI8_0 -; VFP-NEXT: vmov s2, r4 -; VFP-NEXT: vldr s4, .LCPI8_1 -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt lt -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: movlt r2, #0 -; VFP-NEXT: mvnlt r3, #7 -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt gt -; VFP-NEXT: movgt r3, #7 -; VFP-NEXT: movgt.w r2, #-1 -; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt vs -; VFP-NEXT: movvs r0, #0 -; VFP-NEXT: movvs r1, #0 -; VFP-NEXT: movvs r2, #0 -; VFP-NEXT: movvs r3, #0 -; VFP-NEXT: pop {r4, pc} -; VFP-NEXT: .p2align 2 -; VFP-NEXT: @ %bb.1: -; VFP-NEXT: .LCPI8_0: -; VFP-NEXT: .long 0xf1000000 @ float -6.338253E+29 -; VFP-NEXT: .LCPI8_1: -; VFP-NEXT: .long 0x70ffffff @ float 6.33825262E+29 +; VFP2-LABEL: test_signed_i100_f32: +; VFP2: @ %bb.0: +; VFP2-NEXT: .save {r4, lr} +; VFP2-NEXT: push {r4, lr} +; VFP2-NEXT: mov r4, r0 +; VFP2-NEXT: bl __fixsfti +; VFP2-NEXT: vldr s0, .LCPI8_0 +; VFP2-NEXT: vmov s2, r4 +; VFP2-NEXT: vldr s4, .LCPI8_1 +; VFP2-NEXT: vcmp.f32 s2, s0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itttt lt +; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: movlt r1, #0 +; VFP2-NEXT: movlt r2, #0 +; VFP2-NEXT: mvnlt r3, #7 +; VFP2-NEXT: vcmp.f32 s2, s4 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itttt gt +; VFP2-NEXT: movgt r3, #7 +; VFP2-NEXT: movgt.w r2, #-1 +; VFP2-NEXT: movgt.w r1, #-1 +; VFP2-NEXT: movgt.w r0, #-1 +; VFP2-NEXT: vcmp.f32 s2, s2 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itttt vs +; VFP2-NEXT: movvs r0, #0 +; VFP2-NEXT: movvs r1, #0 +; VFP2-NEXT: movvs r2, #0 +; VFP2-NEXT: movvs r3, #0 +; VFP2-NEXT: pop {r4, pc} +; VFP2-NEXT: .p2align 2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: .LCPI8_0: +; VFP2-NEXT: .long 0xf1000000 @ float -6.338253E+29 +; VFP2-NEXT: .LCPI8_1: +; VFP2-NEXT: .long 0x70ffffff @ float 6.33825262E+29 +; +; FP16-LABEL: test_signed_i100_f32: +; FP16: @ %bb.0: +; FP16-NEXT: .save {r4, lr} +; FP16-NEXT: push {r4, lr} +; FP16-NEXT: mov r4, r0 +; FP16-NEXT: bl __fixsfti +; FP16-NEXT: vldr s0, .LCPI8_0 +; FP16-NEXT: vmov s2, r4 +; FP16-NEXT: vldr s4, .LCPI8_1 +; FP16-NEXT: vcmp.f32 s2, s0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: vcmp.f32 s2, s4 +; FP16-NEXT: itttt lt +; FP16-NEXT: movlt r0, #0 +; FP16-NEXT: movlt r1, #0 +; FP16-NEXT: movlt r2, #0 +; FP16-NEXT: mvnlt r3, #7 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: it gt +; FP16-NEXT: movgt r3, #7 +; FP16-NEXT: vcmp.f32 s2, s2 +; FP16-NEXT: csinv r2, r2, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: itttt vs +; FP16-NEXT: movvs r0, #0 +; FP16-NEXT: movvs r1, #0 +; FP16-NEXT: movvs r2, #0 +; FP16-NEXT: movvs r3, #0 +; FP16-NEXT: pop {r4, pc} +; FP16-NEXT: .p2align 2 +; FP16-NEXT: @ %bb.1: +; FP16-NEXT: .LCPI8_0: +; FP16-NEXT: .long 0xf1000000 @ float -6.338253E+29 +; FP16-NEXT: .LCPI8_1: +; FP16-NEXT: .long 0x70ffffff @ float 6.33825262E+29 %x = call i100 @llvm.fptosi.sat.i100.f32(float %f) ret i100 %x } @@ -1033,43 +1137,81 @@ define i128 @test_signed_i128_f32(float %f) nounwind { ; SOFT-NEXT: .LCPI9_1: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; -; VFP-LABEL: test_signed_i128_f32: -; VFP: @ %bb.0: -; VFP-NEXT: .save {r4, lr} -; VFP-NEXT: push {r4, lr} -; VFP-NEXT: mov r4, r0 -; VFP-NEXT: bl __fixsfti -; VFP-NEXT: vldr s0, .LCPI9_0 -; VFP-NEXT: vmov s2, r4 -; VFP-NEXT: vldr s4, .LCPI9_1 -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt lt -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: movlt r2, #0 -; VFP-NEXT: movlt.w r3, #-2147483648 -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt gt -; VFP-NEXT: mvngt r3, #-2147483648 -; VFP-NEXT: movgt.w r2, #-1 -; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt vs -; VFP-NEXT: movvs r0, #0 -; VFP-NEXT: movvs r1, #0 -; VFP-NEXT: movvs r2, #0 -; VFP-NEXT: movvs r3, #0 -; VFP-NEXT: pop {r4, pc} -; VFP-NEXT: .p2align 2 -; VFP-NEXT: @ %bb.1: -; VFP-NEXT: .LCPI9_0: -; VFP-NEXT: .long 0xff000000 @ float -1.70141183E+38 -; VFP-NEXT: .LCPI9_1: -; VFP-NEXT: .long 0x7effffff @ float 1.70141173E+38 +; VFP2-LABEL: test_signed_i128_f32: +; VFP2: @ %bb.0: +; VFP2-NEXT: .save {r4, lr} +; VFP2-NEXT: push {r4, lr} +; VFP2-NEXT: mov r4, r0 +; VFP2-NEXT: bl __fixsfti +; VFP2-NEXT: vldr s0, .LCPI9_0 +; VFP2-NEXT: vmov s2, r4 +; VFP2-NEXT: vldr s4, .LCPI9_1 +; VFP2-NEXT: vcmp.f32 s2, s0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itttt lt +; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: movlt r1, #0 +; VFP2-NEXT: movlt r2, #0 +; VFP2-NEXT: movlt.w r3, #-2147483648 +; VFP2-NEXT: vcmp.f32 s2, s4 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itttt gt +; VFP2-NEXT: mvngt r3, #-2147483648 +; VFP2-NEXT: movgt.w r2, #-1 +; VFP2-NEXT: movgt.w r1, #-1 +; VFP2-NEXT: movgt.w r0, #-1 +; VFP2-NEXT: vcmp.f32 s2, s2 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itttt vs +; VFP2-NEXT: movvs r0, #0 +; VFP2-NEXT: movvs r1, #0 +; VFP2-NEXT: movvs r2, #0 +; VFP2-NEXT: movvs r3, #0 +; VFP2-NEXT: pop {r4, pc} +; VFP2-NEXT: .p2align 2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: .LCPI9_0: +; VFP2-NEXT: .long 0xff000000 @ float -1.70141183E+38 +; VFP2-NEXT: .LCPI9_1: +; VFP2-NEXT: .long 0x7effffff @ float 1.70141173E+38 +; +; FP16-LABEL: test_signed_i128_f32: +; FP16: @ %bb.0: +; FP16-NEXT: .save {r4, lr} +; FP16-NEXT: push {r4, lr} +; FP16-NEXT: mov r4, r0 +; FP16-NEXT: bl __fixsfti +; FP16-NEXT: vldr s0, .LCPI9_0 +; FP16-NEXT: vmov s2, r4 +; FP16-NEXT: vldr s4, .LCPI9_1 +; FP16-NEXT: vcmp.f32 s2, s0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: vcmp.f32 s2, s4 +; FP16-NEXT: itttt lt +; FP16-NEXT: movlt r0, #0 +; FP16-NEXT: movlt r1, #0 +; FP16-NEXT: movlt r2, #0 +; FP16-NEXT: movlt.w r3, #-2147483648 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: it gt +; FP16-NEXT: mvngt r3, #-2147483648 +; FP16-NEXT: vcmp.f32 s2, s2 +; FP16-NEXT: csinv r2, r2, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: itttt vs +; FP16-NEXT: movvs r0, #0 +; FP16-NEXT: movvs r1, #0 +; FP16-NEXT: movvs r2, #0 +; FP16-NEXT: movvs r3, #0 +; FP16-NEXT: pop {r4, pc} +; FP16-NEXT: .p2align 2 +; FP16-NEXT: @ %bb.1: +; FP16-NEXT: .LCPI9_0: +; FP16-NEXT: .long 0xff000000 @ float -1.70141183E+38 +; FP16-NEXT: .LCPI9_1: +; FP16-NEXT: .long 0x7effffff @ float 1.70141173E+38 %x = call i128 @llvm.fptosi.sat.i128.f32(float %f) ret i128 %x } @@ -1993,10 +2135,10 @@ define i64 @test_signed_i64_f64(double %f) nounwind { ; FP16-NEXT: movlt.w r1, #-2147483648 ; FP16-NEXT: vcmp.f64 d1, d2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itt gt +; FP16-NEXT: it gt ; FP16-NEXT: mvngt r1, #-2147483648 -; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f64 d1, d1 +; FP16-NEXT: csinv r0, r0, zr, gt ; FP16-NEXT: vmrs APSR_nzcv, fpscr ; FP16-NEXT: itt vs ; FP16-NEXT: movvs r0, #0 @@ -2193,12 +2335,12 @@ define i100 @test_signed_i100_f64(double %f) nounwind { ; FP16-NEXT: mvnlt r3, #7 ; FP16-NEXT: vcmp.f64 d1, d2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt +; FP16-NEXT: it gt ; FP16-NEXT: movgt r3, #7 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f64 d1, d1 +; FP16-NEXT: csinv r2, r2, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: csinv r0, r0, zr, gt ; FP16-NEXT: vmrs APSR_nzcv, fpscr ; FP16-NEXT: itttt vs ; FP16-NEXT: movvs r0, #0 @@ -2398,12 +2540,12 @@ define i128 @test_signed_i128_f64(double %f) nounwind { ; FP16-NEXT: movlt.w r3, #-2147483648 ; FP16-NEXT: vcmp.f64 d1, d2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt +; FP16-NEXT: it gt ; FP16-NEXT: mvngt r3, #-2147483648 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f64 d1, d1 +; FP16-NEXT: csinv r2, r2, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: csinv r0, r0, zr, gt ; FP16-NEXT: vmrs APSR_nzcv, fpscr ; FP16-NEXT: itttt vs ; FP16-NEXT: movvs r0, #0 @@ -3147,18 +3289,17 @@ define i50 @test_signed_i50_f16(half %f) nounwind { ; FP16-NEXT: vldr s2, .LCPI26_1 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: ittt lt ; FP16-NEXT: movlt r1, #0 ; FP16-NEXT: movtlt r1, #65534 ; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f32 s16, s16 ; FP16-NEXT: itt gt ; FP16-NEXT: movwgt r1, #65535 ; FP16-NEXT: movtgt r1, #1 +; FP16-NEXT: csinv r0, r0, zr, gt ; FP16-NEXT: vmrs APSR_nzcv, fpscr ; FP16-NEXT: itt vs ; FP16-NEXT: movvs r0, #0 @@ -3288,15 +3429,15 @@ define i64 @test_signed_i64_f16(half %f) nounwind { ; FP16-NEXT: vldr s2, .LCPI27_1 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: itt lt ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: movlt.w r1, #-2147483648 -; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itt gt +; FP16-NEXT: it gt ; FP16-NEXT: mvngt r1, #-2147483648 -; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f32 s16, s16 +; FP16-NEXT: csinv r0, r0, zr, gt ; FP16-NEXT: vmrs APSR_nzcv, fpscr ; FP16-NEXT: itt vs ; FP16-NEXT: movvs r0, #0 @@ -3478,19 +3619,19 @@ define i100 @test_signed_i100_f16(half %f) nounwind { ; FP16-NEXT: vldr s2, .LCPI28_1 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: itttt lt ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: movlt r1, #0 ; FP16-NEXT: movlt r2, #0 ; FP16-NEXT: mvnlt r3, #7 -; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt +; FP16-NEXT: it gt ; FP16-NEXT: movgt r3, #7 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f32 s16, s16 +; FP16-NEXT: csinv r2, r2, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: csinv r0, r0, zr, gt ; FP16-NEXT: vmrs APSR_nzcv, fpscr ; FP16-NEXT: itttt vs ; FP16-NEXT: movvs r0, #0 @@ -3676,19 +3817,19 @@ define i128 @test_signed_i128_f16(half %f) nounwind { ; FP16-NEXT: vldr s2, .LCPI29_1 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: itttt lt ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: movlt r1, #0 ; FP16-NEXT: movlt r2, #0 ; FP16-NEXT: movlt.w r3, #-2147483648 -; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt +; FP16-NEXT: it gt ; FP16-NEXT: mvngt r3, #-2147483648 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f32 s16, s16 +; FP16-NEXT: csinv r2, r2, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: csinv r0, r0, zr, gt ; FP16-NEXT: vmrs APSR_nzcv, fpscr ; FP16-NEXT: itttt vs ; FP16-NEXT: movvs r0, #0 diff --git a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll index 4cc5f943dadff..3062ea1433a9f 100644 --- a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll @@ -471,32 +471,59 @@ define i50 @test_signed_i50_f32(float %f) nounwind { ; SOFT-NEXT: .LCPI6_1: ; SOFT-NEXT: .long 262143 @ 0x3ffff ; -; VFP-LABEL: test_signed_i50_f32: -; VFP: @ %bb.0: -; VFP-NEXT: .save {r7, lr} -; VFP-NEXT: push {r7, lr} -; VFP-NEXT: .vsave {d8} -; VFP-NEXT: vpush {d8} -; VFP-NEXT: vmov s16, r0 -; VFP-NEXT: bl __aeabi_f2ulz -; VFP-NEXT: vldr s0, .LCPI6_0 -; VFP-NEXT: vcmp.f32 s16, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itt lt -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: ittt gt -; VFP-NEXT: movwgt r1, #65535 -; VFP-NEXT: movtgt r1, #3 -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vpop {d8} -; VFP-NEXT: pop {r7, pc} -; VFP-NEXT: .p2align 2 -; VFP-NEXT: @ %bb.1: -; VFP-NEXT: .LCPI6_0: -; VFP-NEXT: .long 0x587fffff @ float 1.12589984E+15 +; VFP2-LABEL: test_signed_i50_f32: +; VFP2: @ %bb.0: +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} +; VFP2-NEXT: .vsave {d8} +; VFP2-NEXT: vpush {d8} +; VFP2-NEXT: vmov s16, r0 +; VFP2-NEXT: bl __aeabi_f2ulz +; VFP2-NEXT: vldr s0, .LCPI6_0 +; VFP2-NEXT: vcmp.f32 s16, #0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itt lt +; VFP2-NEXT: movlt r1, #0 +; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vcmp.f32 s16, s0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: ittt gt +; VFP2-NEXT: movwgt r1, #65535 +; VFP2-NEXT: movtgt r1, #3 +; VFP2-NEXT: movgt.w r0, #-1 +; VFP2-NEXT: vpop {d8} +; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: .p2align 2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: .LCPI6_0: +; VFP2-NEXT: .long 0x587fffff @ float 1.12589984E+15 +; +; FP16-LABEL: test_signed_i50_f32: +; FP16: @ %bb.0: +; FP16-NEXT: .save {r7, lr} +; FP16-NEXT: push {r7, lr} +; FP16-NEXT: .vsave {d8} +; FP16-NEXT: vpush {d8} +; FP16-NEXT: vmov s16, r0 +; FP16-NEXT: bl __aeabi_f2ulz +; FP16-NEXT: vldr s0, .LCPI6_0 +; FP16-NEXT: vcmp.f32 s16, #0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: itt lt +; FP16-NEXT: movlt r1, #0 +; FP16-NEXT: movlt r0, #0 +; FP16-NEXT: vcmp.f32 s16, s0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: itt gt +; FP16-NEXT: movwgt r1, #65535 +; FP16-NEXT: movtgt r1, #3 +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: vpop {d8} +; FP16-NEXT: pop {r7, pc} +; FP16-NEXT: .p2align 2 +; FP16-NEXT: @ %bb.1: +; FP16-NEXT: .LCPI6_0: +; FP16-NEXT: .long 0x587fffff @ float 1.12589984E+15 %x = call i50 @llvm.fptoui.sat.i50.f32(float %f) ret i50 %x } @@ -550,31 +577,56 @@ define i64 @test_signed_i64_f32(float %f) nounwind { ; SOFT-NEXT: .LCPI7_0: ; SOFT-NEXT: .long 1602224127 @ 0x5f7fffff ; -; VFP-LABEL: test_signed_i64_f32: -; VFP: @ %bb.0: -; VFP-NEXT: .save {r7, lr} -; VFP-NEXT: push {r7, lr} -; VFP-NEXT: .vsave {d8} -; VFP-NEXT: vpush {d8} -; VFP-NEXT: vmov s16, r0 -; VFP-NEXT: bl __aeabi_f2ulz -; VFP-NEXT: vldr s0, .LCPI7_0 -; VFP-NEXT: vcmp.f32 s16, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itt lt -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itt gt -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: vpop {d8} -; VFP-NEXT: pop {r7, pc} -; VFP-NEXT: .p2align 2 -; VFP-NEXT: @ %bb.1: -; VFP-NEXT: .LCPI7_0: -; VFP-NEXT: .long 0x5f7fffff @ float 1.8446743E+19 +; VFP2-LABEL: test_signed_i64_f32: +; VFP2: @ %bb.0: +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} +; VFP2-NEXT: .vsave {d8} +; VFP2-NEXT: vpush {d8} +; VFP2-NEXT: vmov s16, r0 +; VFP2-NEXT: bl __aeabi_f2ulz +; VFP2-NEXT: vldr s0, .LCPI7_0 +; VFP2-NEXT: vcmp.f32 s16, #0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itt lt +; VFP2-NEXT: movlt r1, #0 +; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vcmp.f32 s16, s0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itt gt +; VFP2-NEXT: movgt.w r0, #-1 +; VFP2-NEXT: movgt.w r1, #-1 +; VFP2-NEXT: vpop {d8} +; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: .p2align 2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: .LCPI7_0: +; VFP2-NEXT: .long 0x5f7fffff @ float 1.8446743E+19 +; +; FP16-LABEL: test_signed_i64_f32: +; FP16: @ %bb.0: +; FP16-NEXT: .save {r7, lr} +; FP16-NEXT: push {r7, lr} +; FP16-NEXT: .vsave {d8} +; FP16-NEXT: vpush {d8} +; FP16-NEXT: vmov s16, r0 +; FP16-NEXT: bl __aeabi_f2ulz +; FP16-NEXT: vldr s0, .LCPI7_0 +; FP16-NEXT: vcmp.f32 s16, #0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: itt lt +; FP16-NEXT: movlt r1, #0 +; FP16-NEXT: movlt r0, #0 +; FP16-NEXT: vcmp.f32 s16, s0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: vpop {d8} +; FP16-NEXT: pop {r7, pc} +; FP16-NEXT: .p2align 2 +; FP16-NEXT: @ %bb.1: +; FP16-NEXT: .LCPI7_0: +; FP16-NEXT: .long 0x5f7fffff @ float 1.8446743E+19 %x = call i64 @llvm.fptoui.sat.i64.f32(float %f) ret i64 %x } @@ -654,35 +706,65 @@ define i100 @test_signed_i100_f32(float %f) nounwind { ; SOFT-NEXT: .LCPI8_0: ; SOFT-NEXT: .long 1904214015 @ 0x717fffff ; -; VFP-LABEL: test_signed_i100_f32: -; VFP: @ %bb.0: -; VFP-NEXT: .save {r7, lr} -; VFP-NEXT: push {r7, lr} -; VFP-NEXT: .vsave {d8} -; VFP-NEXT: vpush {d8} -; VFP-NEXT: vmov s16, r0 -; VFP-NEXT: bl __fixunssfti -; VFP-NEXT: vldr s0, .LCPI8_0 -; VFP-NEXT: vcmp.f32 s16, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt lt -; VFP-NEXT: movlt r3, #0 -; VFP-NEXT: movlt r2, #0 -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt gt -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: movgt.w r2, #-1 -; VFP-NEXT: movgt r3, #15 -; VFP-NEXT: vpop {d8} -; VFP-NEXT: pop {r7, pc} -; VFP-NEXT: .p2align 2 -; VFP-NEXT: @ %bb.1: -; VFP-NEXT: .LCPI8_0: -; VFP-NEXT: .long 0x717fffff @ float 1.26765052E+30 +; VFP2-LABEL: test_signed_i100_f32: +; VFP2: @ %bb.0: +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} +; VFP2-NEXT: .vsave {d8} +; VFP2-NEXT: vpush {d8} +; VFP2-NEXT: vmov s16, r0 +; VFP2-NEXT: bl __fixunssfti +; VFP2-NEXT: vldr s0, .LCPI8_0 +; VFP2-NEXT: vcmp.f32 s16, #0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itttt lt +; VFP2-NEXT: movlt r3, #0 +; VFP2-NEXT: movlt r2, #0 +; VFP2-NEXT: movlt r1, #0 +; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vcmp.f32 s16, s0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itttt gt +; VFP2-NEXT: movgt.w r0, #-1 +; VFP2-NEXT: movgt.w r1, #-1 +; VFP2-NEXT: movgt.w r2, #-1 +; VFP2-NEXT: movgt r3, #15 +; VFP2-NEXT: vpop {d8} +; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: .p2align 2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: .LCPI8_0: +; VFP2-NEXT: .long 0x717fffff @ float 1.26765052E+30 +; +; FP16-LABEL: test_signed_i100_f32: +; FP16: @ %bb.0: +; FP16-NEXT: .save {r7, lr} +; FP16-NEXT: push {r7, lr} +; FP16-NEXT: .vsave {d8} +; FP16-NEXT: vpush {d8} +; FP16-NEXT: vmov s16, r0 +; FP16-NEXT: bl __fixunssfti +; FP16-NEXT: vldr s0, .LCPI8_0 +; FP16-NEXT: vcmp.f32 s16, #0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: itttt lt +; FP16-NEXT: movlt r2, #0 +; FP16-NEXT: movlt r1, #0 +; FP16-NEXT: movlt r0, #0 +; FP16-NEXT: movlt r3, #0 +; FP16-NEXT: vcmp.f32 s16, s0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: it gt +; FP16-NEXT: movgt r3, #15 +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: csinv r2, r2, zr, gt +; FP16-NEXT: vpop {d8} +; FP16-NEXT: pop {r7, pc} +; FP16-NEXT: .p2align 2 +; FP16-NEXT: @ %bb.1: +; FP16-NEXT: .LCPI8_0: +; FP16-NEXT: .long 0x717fffff @ float 1.26765052E+30 %x = call i100 @llvm.fptoui.sat.i100.f32(float %f) ret i100 %x } @@ -763,35 +845,64 @@ define i128 @test_signed_i128_f32(float %f) nounwind { ; SOFT-NEXT: .LCPI9_0: ; SOFT-NEXT: .long 2139095039 @ 0x7f7fffff ; -; VFP-LABEL: test_signed_i128_f32: -; VFP: @ %bb.0: -; VFP-NEXT: .save {r7, lr} -; VFP-NEXT: push {r7, lr} -; VFP-NEXT: .vsave {d8} -; VFP-NEXT: vpush {d8} -; VFP-NEXT: vmov s16, r0 -; VFP-NEXT: bl __fixunssfti -; VFP-NEXT: vldr s0, .LCPI9_0 -; VFP-NEXT: vcmp.f32 s16, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt lt -; VFP-NEXT: movlt r3, #0 -; VFP-NEXT: movlt r2, #0 -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt gt -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: movgt.w r2, #-1 -; VFP-NEXT: movgt.w r3, #-1 -; VFP-NEXT: vpop {d8} -; VFP-NEXT: pop {r7, pc} -; VFP-NEXT: .p2align 2 -; VFP-NEXT: @ %bb.1: -; VFP-NEXT: .LCPI9_0: -; VFP-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 +; VFP2-LABEL: test_signed_i128_f32: +; VFP2: @ %bb.0: +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} +; VFP2-NEXT: .vsave {d8} +; VFP2-NEXT: vpush {d8} +; VFP2-NEXT: vmov s16, r0 +; VFP2-NEXT: bl __fixunssfti +; VFP2-NEXT: vldr s0, .LCPI9_0 +; VFP2-NEXT: vcmp.f32 s16, #0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itttt lt +; VFP2-NEXT: movlt r3, #0 +; VFP2-NEXT: movlt r2, #0 +; VFP2-NEXT: movlt r1, #0 +; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vcmp.f32 s16, s0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: itttt gt +; VFP2-NEXT: movgt.w r0, #-1 +; VFP2-NEXT: movgt.w r1, #-1 +; VFP2-NEXT: movgt.w r2, #-1 +; VFP2-NEXT: movgt.w r3, #-1 +; VFP2-NEXT: vpop {d8} +; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: .p2align 2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: .LCPI9_0: +; VFP2-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 +; +; FP16-LABEL: test_signed_i128_f32: +; FP16: @ %bb.0: +; FP16-NEXT: .save {r7, lr} +; FP16-NEXT: push {r7, lr} +; FP16-NEXT: .vsave {d8} +; FP16-NEXT: vpush {d8} +; FP16-NEXT: vmov s16, r0 +; FP16-NEXT: bl __fixunssfti +; FP16-NEXT: vldr s0, .LCPI9_0 +; FP16-NEXT: vcmp.f32 s16, #0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: itttt lt +; FP16-NEXT: movlt r3, #0 +; FP16-NEXT: movlt r2, #0 +; FP16-NEXT: movlt r1, #0 +; FP16-NEXT: movlt r0, #0 +; FP16-NEXT: vcmp.f32 s16, s0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: csinv r2, r2, zr, gt +; FP16-NEXT: csinv r3, r3, zr, gt +; FP16-NEXT: vpop {d8} +; FP16-NEXT: pop {r7, pc} +; FP16-NEXT: .p2align 2 +; FP16-NEXT: @ %bb.1: +; FP16-NEXT: .LCPI9_0: +; FP16-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %x = call i128 @llvm.fptoui.sat.i128.f32(float %f) ret i128 %x } @@ -1508,9 +1619,8 @@ define i64 @test_signed_i64_f64(double %f) nounwind { ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f64 d8, d0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itt gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: movgt.w r1, #-1 +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} ; FP16-NEXT: .p2align 3 @@ -1645,17 +1755,17 @@ define i100 @test_signed_i100_f64(double %f) nounwind { ; FP16-NEXT: vldr d0, .LCPI18_0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr ; FP16-NEXT: itttt lt -; FP16-NEXT: movlt r3, #0 ; FP16-NEXT: movlt r2, #0 ; FP16-NEXT: movlt r1, #0 ; FP16-NEXT: movlt r0, #0 +; FP16-NEXT: movlt r3, #0 ; FP16-NEXT: vcmp.f64 d8, d0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r2, #-1 +; FP16-NEXT: it gt ; FP16-NEXT: movgt r3, #15 +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: csinv r2, r2, zr, gt ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} ; FP16-NEXT: .p2align 3 @@ -1797,11 +1907,10 @@ define i128 @test_signed_i128_f64(double %f) nounwind { ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f64 d8, d0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt.w r3, #-1 +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: csinv r2, r2, zr, gt +; FP16-NEXT: csinv r3, r3, zr, gt ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} ; FP16-NEXT: .p2align 3 @@ -2369,10 +2478,10 @@ define i50 @test_signed_i50_f16(half %f) nounwind { ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: ittt gt +; FP16-NEXT: itt gt ; FP16-NEXT: movwgt r1, #65535 ; FP16-NEXT: movtgt r1, #3 -; FP16-NEXT: movgt.w r0, #-1 +; FP16-NEXT: csinv r0, r0, zr, gt ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} ; FP16-NEXT: .p2align 2 @@ -2477,9 +2586,8 @@ define i64 @test_signed_i64_f16(half %f) nounwind { ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itt gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: movgt.w r1, #-1 +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} ; FP16-NEXT: .p2align 2 @@ -2610,17 +2718,17 @@ define i100 @test_signed_i100_f16(half %f) nounwind { ; FP16-NEXT: vcmp.f32 s16, #0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr ; FP16-NEXT: itttt lt -; FP16-NEXT: movlt r3, #0 ; FP16-NEXT: movlt r2, #0 ; FP16-NEXT: movlt r1, #0 ; FP16-NEXT: movlt r0, #0 +; FP16-NEXT: movlt r3, #0 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r2, #-1 +; FP16-NEXT: it gt ; FP16-NEXT: movgt r3, #15 +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: csinv r2, r2, zr, gt ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} ; FP16-NEXT: .p2align 2 @@ -2758,11 +2866,10 @@ define i128 @test_signed_i128_f16(half %f) nounwind { ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt.w r3, #-1 +; FP16-NEXT: csinv r0, r0, zr, gt +; FP16-NEXT: csinv r1, r1, zr, gt +; FP16-NEXT: csinv r2, r2, zr, gt +; FP16-NEXT: csinv r3, r3, zr, gt ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} ; FP16-NEXT: .p2align 2 diff --git a/llvm/test/CodeGen/Thumb/scmp.ll b/llvm/test/CodeGen/Thumb/scmp.ll index c0024492b3a6d..297a9987e7576 100644 --- a/llvm/test/CodeGen/Thumb/scmp.ll +++ b/llvm/test/CodeGen/Thumb/scmp.ll @@ -37,8 +37,7 @@ define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind { ; V81M: @ %bb.0: ; V81M-NEXT: cmp r0, r1 ; V81M-NEXT: cset r0, gt -; V81M-NEXT: it lt -; V81M-NEXT: movlt.w r0, #-1 +; V81M-NEXT: csinv r0, r0, zr, lt ; V81M-NEXT: bx lr %1 = call i8 @llvm.scmp(i8 %x, i8 %y) ret i8 %1 @@ -78,8 +77,7 @@ define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind { ; V81M: @ %bb.0: ; V81M-NEXT: cmp r0, r1 ; V81M-NEXT: cset r0, gt -; V81M-NEXT: it lt -; V81M-NEXT: movlt.w r0, #-1 +; V81M-NEXT: csinv r0, r0, zr, lt ; V81M-NEXT: bx lr %1 = call i8 @llvm.scmp(i16 %x, i16 %y) ret i8 %1 @@ -119,8 +117,7 @@ define i8 @scmp_8_32(i32 %x, i32 %y) nounwind { ; V81M: @ %bb.0: ; V81M-NEXT: cmp r0, r1 ; V81M-NEXT: cset r0, gt -; V81M-NEXT: it lt -; V81M-NEXT: movlt.w r0, #-1 +; V81M-NEXT: csinv r0, r0, zr, lt ; V81M-NEXT: bx lr %1 = call i8 @llvm.scmp(i32 %x, i32 %y) ret i8 %1 @@ -303,8 +300,7 @@ define i32 @scmp_32_32(i32 %x, i32 %y) nounwind { ; V81M: @ %bb.0: ; V81M-NEXT: cmp r0, r1 ; V81M-NEXT: cset r0, gt -; V81M-NEXT: it lt -; V81M-NEXT: movlt.w r0, #-1 +; V81M-NEXT: csinv r0, r0, zr, lt ; V81M-NEXT: bx lr %1 = call i32 @llvm.scmp(i32 %x, i32 %y) ret i32 %1 diff --git a/llvm/test/CodeGen/Thumb/ucmp.ll b/llvm/test/CodeGen/Thumb/ucmp.ll index 5d0f57e2a9d72..d34a2127d5141 100644 --- a/llvm/test/CodeGen/Thumb/ucmp.ll +++ b/llvm/test/CodeGen/Thumb/ucmp.ll @@ -26,8 +26,7 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind { ; V81M: @ %bb.0: ; V81M-NEXT: cmp r0, r1 ; V81M-NEXT: cset r0, hi -; V81M-NEXT: it lo -; V81M-NEXT: movlo.w r0, #-1 +; V81M-NEXT: csinv r0, r0, zr, lo ; V81M-NEXT: bx lr %1 = call i8 @llvm.ucmp(i8 %x, i8 %y) ret i8 %1 @@ -56,8 +55,7 @@ define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind { ; V81M: @ %bb.0: ; V81M-NEXT: cmp r0, r1 ; V81M-NEXT: cset r0, hi -; V81M-NEXT: it lo -; V81M-NEXT: movlo.w r0, #-1 +; V81M-NEXT: csinv r0, r0, zr, lo ; V81M-NEXT: bx lr %1 = call i8 @llvm.ucmp(i16 %x, i16 %y) ret i8 %1 @@ -86,8 +84,7 @@ define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind { ; V81M: @ %bb.0: ; V81M-NEXT: cmp r0, r1 ; V81M-NEXT: cset r0, hi -; V81M-NEXT: it lo -; V81M-NEXT: movlo.w r0, #-1 +; V81M-NEXT: csinv r0, r0, zr, lo ; V81M-NEXT: bx lr %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) ret i8 %1 @@ -259,8 +256,7 @@ define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind { ; V81M: @ %bb.0: ; V81M-NEXT: cmp r0, r1 ; V81M-NEXT: cset r0, hi -; V81M-NEXT: it lo -; V81M-NEXT: movlo.w r0, #-1 +; V81M-NEXT: csinv r0, r0, zr, lo ; V81M-NEXT: bx lr %1 = call i32 @llvm.ucmp(i32 %x, i32 %y) ret i32 %1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll index 6f986ce28381b..df51c81345057 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -630,7 +630,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr define i32 @wrongop(ptr nocapture readonly %pd) { ; CHECK-LABEL: wrongop: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: movw r12, #47184 ; CHECK-NEXT: movw r3, #23593 @@ -639,34 +639,34 @@ define i32 @wrongop(ptr nocapture readonly %pd) { ; CHECK-NEXT: movt r3, #49807 ; CHECK-NEXT: mla r3, lr, r3, r12 ; CHECK-NEXT: movw r1, #55051 -; CHECK-NEXT: movw r4, #23593 ; CHECK-NEXT: movt r1, #163 ; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: movt r4, #655 ; CHECK-NEXT: ror.w r12, r3, #4 ; CHECK-NEXT: cmp r12, r1 +; CHECK-NEXT: ror.w r12, r3, #2 +; CHECK-NEXT: movw r3, #23593 ; CHECK-NEXT: cset r1, lo -; CHECK-NEXT: ror.w r3, r3, #2 -; CHECK-NEXT: mov.w r12, #1 -; CHECK-NEXT: cmp r3, r4 -; CHECK-NEXT: csel r3, r1, r12, lo -; CHECK-NEXT: lsls.w r4, lr, #30 -; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: movt r3, #655 +; CHECK-NEXT: cmp r12, r3 +; CHECK-NEXT: csinc r12, r1, zr, hs +; CHECK-NEXT: lsls.w r3, lr, #30 +; CHECK-NEXT: csel r1, r1, r12, ne ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB8_1: @ %vector.ph -; CHECK-NEXT: movw r3, :lower16:days -; CHECK-NEXT: movs r4, #52 -; CHECK-NEXT: movt r3, :upper16:days -; CHECK-NEXT: smlabb r1, r1, r4, r3 +; CHECK-NEXT: movw r12, :lower16:days +; CHECK-NEXT: movs r3, #52 +; CHECK-NEXT: movt r12, :upper16:days +; CHECK-NEXT: smlabb r1, r1, r3, r12 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q0, r3 +; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: vmov.32 q0[0], r0 ; CHECK-NEXT: adds r0, r2, #3 ; CHECK-NEXT: bic r0, r0, #3 ; CHECK-NEXT: subs r0, #4 -; CHECK-NEXT: add.w r0, r12, r0, lsr #2 +; CHECK-NEXT: add.w r0, r3, r0, lsr #2 ; CHECK-NEXT: dls lr, r0 ; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -680,7 +680,7 @@ define i32 @wrongop(ptr nocapture readonly %pd) { ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop {r7, pc} entry: %day1 = getelementptr inbounds %struct.date, ptr %pd, i32 0, i32 0 %0 = load i32, ptr %day1, align 4 diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll index d076cb00ad7e0..3ec639e459401 100644 --- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll +++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll @@ -362,31 +362,31 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: blt.w .LBB1_28 ; CHECK-NEXT: @ %bb.1: @ %for.cond2.preheader.lr.ph -; CHECK-NEXT: movs r0, #1 ; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: csel r7, r2, r0, lt ; CHECK-NEXT: mov r12, r1 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: csinc r7, r2, zr, ge +; CHECK-NEXT: movw r1, #43691 +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: cmp r7, #3 ; CHECK-NEXT: it ls -; CHECK-NEXT: movls r1, #3 +; CHECK-NEXT: movls r0, #3 +; CHECK-NEXT: movt r1, #43690 +; CHECK-NEXT: subs r0, r0, r7 ; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: subs r1, r1, r7 -; CHECK-NEXT: movw r2, #43691 -; CHECK-NEXT: adds r1, #2 -; CHECK-NEXT: movt r2, #43690 +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: movs r2, #4 ; CHECK-NEXT: ldr r6, [sp, #128] ; CHECK-NEXT: movw r8, :lower16:c -; CHECK-NEXT: umull r1, r2, r1, r2 +; CHECK-NEXT: umull r0, r1, r0, r1 ; CHECK-NEXT: movt r8, :upper16:c -; CHECK-NEXT: movs r1, #4 +; CHECK-NEXT: movs r0, #1 ; CHECK-NEXT: @ implicit-def: $r10 ; CHECK-NEXT: @ implicit-def: $r5 ; CHECK-NEXT: @ implicit-def: $r11 ; CHECK-NEXT: mov.w r9, #12 ; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: add.w r1, r1, r2, lsr #1 -; CHECK-NEXT: add.w r0, r0, r2, lsr #1 +; CHECK-NEXT: add.w r0, r0, r1, lsr #1 +; CHECK-NEXT: add.w r1, r2, r1, lsr #1 ; CHECK-NEXT: bic r3, r1, #3 ; CHECK-NEXT: adr r1, .LCPI1_0 ; CHECK-NEXT: vldrw.u32 q0, [r1] diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll index 742f2a75a1aa8..e8895bedc78a2 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -684,17 +684,17 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: sbcs r5, r2, #0 ; CHECK-NEXT: mov.w r7, #-2147483648 ; CHECK-NEXT: sbcs r5, r3, #0 +; CHECK-NEXT: mov.w r6, #-1 ; CHECK-NEXT: cset r5, lt ; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: csinv r0, r0, zr, eq ; CHECK-NEXT: csel r3, r3, r5, ne ; CHECK-NEXT: csel r2, r2, r5, ne -; CHECK-NEXT: mov.w r5, #-1 ; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r7, r1 -; CHECK-NEXT: sbcs.w r2, r5, r2 -; CHECK-NEXT: sbcs.w r2, r5, r3 +; CHECK-NEXT: rsbs r5, r0, #0 +; CHECK-NEXT: sbcs.w r5, r7, r1 +; CHECK-NEXT: sbcs.w r2, r6, r2 +; CHECK-NEXT: sbcs.w r2, r6, r3 ; CHECK-NEXT: csel r8, r1, r7, lt ; CHECK-NEXT: cset r1, lt ; CHECK-NEXT: cmp r1, #0 @@ -702,20 +702,20 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs.w r6, r0, #-1 -; CHECK-NEXT: sbcs.w r6, r1, r4 -; CHECK-NEXT: sbcs r6, r2, #0 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: csel r3, r3, r6, ne -; CHECK-NEXT: csel r2, r2, r6, ne +; CHECK-NEXT: subs.w r5, r0, #-1 +; CHECK-NEXT: sbcs.w r5, r1, r4 +; CHECK-NEXT: sbcs r5, r2, #0 +; CHECK-NEXT: sbcs r5, r3, #0 +; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: csinv r0, r0, zr, eq +; CHECK-NEXT: csel r3, r3, r5, ne +; CHECK-NEXT: csel r2, r2, r5, ne ; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r7, r1 -; CHECK-NEXT: sbcs.w r2, r5, r2 -; CHECK-NEXT: sbcs.w r2, r5, r3 +; CHECK-NEXT: rsbs r5, r0, #0 +; CHECK-NEXT: sbcs.w r5, r7, r1 +; CHECK-NEXT: sbcs.w r2, r6, r2 +; CHECK-NEXT: sbcs.w r2, r6, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: csel r1, r1, r7, lt ; CHECK-NEXT: cmp r2, #0 @@ -776,8 +776,10 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 @@ -786,13 +788,12 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: vmov r12, lr, d8 ; CHECK-NEXT: subs r4, r2, #1 ; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov.w r8, #1 ; CHECK-NEXT: cset r4, lt ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: csel r0, r0, r4, ne ; CHECK-NEXT: csel r3, r3, r4, ne ; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: csel r2, r2, r8, ne +; CHECK-NEXT: csinc r2, r2, zr, eq ; CHECK-NEXT: rsbs r5, r0, #0 ; CHECK-NEXT: mov.w r4, #0 ; CHECK-NEXT: sbcs.w r5, r4, r1 @@ -800,31 +801,32 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: sbcs.w r2, r4, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r5, r1, r2, ne -; CHECK-NEXT: csel r7, r0, r2, ne +; CHECK-NEXT: csel r7, r1, r2, ne +; CHECK-NEXT: csel r6, r0, r2, ne ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r6, r2, #1 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: csel r3, r3, r6, ne -; CHECK-NEXT: csel r1, r1, r6, ne -; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r4, r1 +; CHECK-NEXT: subs r5, r2, #1 +; CHECK-NEXT: sbcs r5, r3, #0 +; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: csel r0, r0, r5, ne +; CHECK-NEXT: csel r3, r3, r5, ne +; CHECK-NEXT: csel r1, r1, r5, ne +; CHECK-NEXT: csinc r2, r2, zr, eq +; CHECK-NEXT: rsbs r5, r0, #0 +; CHECK-NEXT: sbcs.w r5, r4, r1 ; CHECK-NEXT: sbcs.w r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r4, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -845,21 +847,21 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: subs.w r7, r0, #-1 ; CHECK-NEXT: mvn r5, #-2147483648 ; CHECK-NEXT: sbcs.w r7, r1, r5 -; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: mov.w r6, #-2147483648 ; CHECK-NEXT: sbcs r7, r2, #0 ; CHECK-NEXT: sbcs r7, r3, #0 ; CHECK-NEXT: cset r7, lt ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r6, ne +; CHECK-NEXT: csinv r0, r0, zr, eq ; CHECK-NEXT: csel r3, r3, r7, ne ; CHECK-NEXT: csel r2, r2, r7, ne ; CHECK-NEXT: csel r1, r1, r5, ne ; CHECK-NEXT: rsbs r4, r0, #0 -; CHECK-NEXT: mov.w r7, #-2147483648 -; CHECK-NEXT: sbcs.w r4, r7, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 -; CHECK-NEXT: csel r8, r1, r7, lt +; CHECK-NEXT: mov.w r7, #-1 +; CHECK-NEXT: sbcs.w r4, r6, r1 +; CHECK-NEXT: sbcs.w r2, r7, r2 +; CHECK-NEXT: sbcs.w r2, r7, r3 +; CHECK-NEXT: csel r8, r1, r6, lt ; CHECK-NEXT: cset r1, lt ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: csel r10, r0, r1, ne @@ -871,16 +873,16 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: sbcs r4, r3, #0 ; CHECK-NEXT: cset r4, lt ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r6, ne +; CHECK-NEXT: csinv r0, r0, zr, eq ; CHECK-NEXT: csel r3, r3, r4, ne ; CHECK-NEXT: csel r2, r2, r4, ne ; CHECK-NEXT: csel r1, r1, r5, ne ; CHECK-NEXT: rsbs r5, r0, #0 -; CHECK-NEXT: sbcs.w r5, r7, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: sbcs.w r5, r6, r1 +; CHECK-NEXT: sbcs.w r2, r7, r2 +; CHECK-NEXT: sbcs.w r2, r7, r3 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: csel r1, r1, r7, lt +; CHECK-NEXT: csel r1, r1, r6, lt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne ; CHECK-NEXT: vmov q0[2], q0[0], r10, r0 @@ -931,49 +933,51 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-LABEL: ustest_f32i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: vmov r5, r0, d0 +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vmov r6, r0, d0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: subs r4, r2, #1 -; CHECK-NEXT: mov.w r8, #1 ; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: cset r4, lt ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: csel r0, r0, r4, ne ; CHECK-NEXT: csel r3, r3, r4, ne ; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: rsbs r4, r0, #0 -; CHECK-NEXT: sbcs.w r4, r6, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: csinc r2, r2, zr, eq +; CHECK-NEXT: rsbs r5, r0, #0 +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: sbcs.w r5, r4, r1 +; CHECK-NEXT: sbcs.w r2, r4, r2 +; CHECK-NEXT: sbcs.w r2, r4, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r7, r0, r2, ne -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: csel r4, r1, r2, ne +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: csel r5, r1, r2, ne ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r5, r2, #1 -; CHECK-NEXT: sbcs r5, r3, #0 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: csel r3, r3, r5, ne -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: rsbs r5, r0, #0 -; CHECK-NEXT: sbcs.w r5, r6, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: subs r6, r2, #1 +; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: cset r6, lt +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: csel r0, r0, r6, ne +; CHECK-NEXT: csel r3, r3, r6, ne +; CHECK-NEXT: csel r1, r1, r6, ne +; CHECK-NEXT: csinc r2, r2, zr, eq +; CHECK-NEXT: rsbs r6, r0, #0 +; CHECK-NEXT: sbcs.w r6, r4, r1 +; CHECK-NEXT: sbcs.w r2, r4, r2 +; CHECK-NEXT: sbcs.w r2, r4, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne ; CHECK-NEXT: csel r1, r1, r2, ne ; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1741,7 +1745,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: sbcs r5, r3, #0 ; CHECK-NEXT: cset r5, lt ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r6, ne +; CHECK-NEXT: csinv r0, r0, zr, eq ; CHECK-NEXT: csel r3, r3, r5, ne ; CHECK-NEXT: csel r2, r2, r5, ne ; CHECK-NEXT: csel r1, r1, r4, ne @@ -1763,7 +1767,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: sbcs r7, r3, #0 ; CHECK-NEXT: cset r7, lt ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r6, ne +; CHECK-NEXT: csinv r0, r0, zr, eq ; CHECK-NEXT: csel r3, r3, r7, ne ; CHECK-NEXT: csel r2, r2, r7, ne ; CHECK-NEXT: csel r1, r1, r4, ne @@ -1888,12 +1892,12 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: sbcs r7, r3, #0 ; CHECK-NEXT: cset r7, lt ; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: csinv r0, r0, zr, eq ; CHECK-NEXT: csel r3, r3, r7, ne ; CHECK-NEXT: csel r2, r2, r7, ne -; CHECK-NEXT: mov.w r7, #-1 ; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: csel r0, r0, r7, ne ; CHECK-NEXT: rsbs r4, r0, #0 +; CHECK-NEXT: mov.w r7, #-1 ; CHECK-NEXT: sbcs.w r4, r6, r1 ; CHECK-NEXT: sbcs.w r2, r7, r2 ; CHECK-NEXT: sbcs.w r2, r7, r3 @@ -1909,7 +1913,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: sbcs r4, r3, #0 ; CHECK-NEXT: cset r4, lt ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r7, ne +; CHECK-NEXT: csinv r0, r0, zr, eq ; CHECK-NEXT: csel r3, r3, r4, ne ; CHECK-NEXT: csel r2, r2, r4, ne ; CHECK-NEXT: csel r1, r1, r5, ne diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll index 77548b49d77f2..602d07dfdc366 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll @@ -35,33 +35,32 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f32_v2i32(<2 x float> %f) { ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vldr s18, .LCPI1_0 -; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vldr s20, .LCPI1_1 ; CHECK-NEXT: vcmp.f32 s17, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r4, #-1 -; CHECK-NEXT: movlt.w r5, #-2147483648 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt.w r4, #-2147483648 ; CHECK-NEXT: vcmp.f32 s17, s20 +; CHECK-NEXT: csinv r5, r1, zr, lt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt -; CHECK-NEXT: mvngt r5, #-2147483648 -; CHECK-NEXT: movgt r4, #0 +; CHECK-NEXT: mvngt r4, #-2147483648 +; CHECK-NEXT: movgt r5, #0 ; CHECK-NEXT: vcmp.f32 s17, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r4, #0 ; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: movvs r4, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s16, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt +; CHECK-NEXT: it lt ; CHECK-NEXT: movlt.w r0, #-2147483648 -; CHECK-NEXT: movlt.w r1, #-1 ; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: csinv r1, r1, zr, lt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt r1, #0 @@ -69,10 +68,10 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f32_v2i32(<2 x float> %f) { ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .p2align 2 @@ -339,82 +338,80 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f64_v2i32(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI9_0 -; CHECK-NEXT: vmov r9, r8, d9 -; CHECK-NEXT: vmov r7, r3, d0 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: vmov r8, r7, d9 +; CHECK-NEXT: vmov r11, r5, d0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vldr d0, .LCPI9_1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: csinv r9, r1, zr, eq ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: vmov r11, r10, d8 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill -; CHECK-NEXT: itt eq -; CHECK-NEXT: moveq.w r4, #-1 -; CHECK-NEXT: moveq.w r5, #-2147483648 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r6, r10, d8 +; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq.w r4, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: itt ne -; CHECK-NEXT: mvnne r5, #-2147483648 -; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: mvnne r4, #-2147483648 +; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: itt eq +; CHECK-NEXT: it eq ; CHECK-NEXT: moveq.w r7, #-2147483648 -; CHECK-NEXT: moveq.w r6, #-1 -; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: csinv r5, r1, zr, eq +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movne r5, #0 ; CHECK-NEXT: mvnne r7, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r5, #0 ; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r7, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r4 -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r5, r9 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -1212,34 +1209,33 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) { ; CHECK-NEXT: vcvtt.f32.f16 s18, s16 ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vldr s20, .LCPI15_0 ; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vldr s20, .LCPI15_0 -; CHECK-NEXT: vldr s22, .LCPI15_1 -; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: vldr s22, .LCPI15_1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r4, #-1 -; CHECK-NEXT: movlt.w r5, #-2147483648 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt.w r4, #-2147483648 ; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: csinv r5, r1, zr, lt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt -; CHECK-NEXT: mvngt r5, #-2147483648 -; CHECK-NEXT: movgt r4, #0 +; CHECK-NEXT: mvngt r4, #-2147483648 +; CHECK-NEXT: movgt r5, #0 ; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r4, #0 ; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: movvs r4, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt +; CHECK-NEXT: it lt ; CHECK-NEXT: movlt.w r0, #-2147483648 -; CHECK-NEXT: movlt.w r1, #-1 ; CHECK-NEXT: vcmp.f32 s16, s22 +; CHECK-NEXT: csinv r1, r1, zr, lt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt r1, #0 @@ -1247,10 +1243,10 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) { ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .p2align 2 @@ -1748,125 +1744,122 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f32_v4i32_duplicate(<4 x float> define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i50: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vldr s22, .LCPI28_0 -; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vldr s20, .LCPI28_1 +; CHECK-NEXT: vmov r1, s18 ; CHECK-NEXT: vcmp.f32 s17, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movwlt r11, #0 -; CHECK-NEXT: movtlt r11, #65534 +; CHECK-NEXT: movwlt r9, #0 +; CHECK-NEXT: movtlt r9, #65534 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r9, #-1 ; CHECK-NEXT: vcmp.f32 s17, s17 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r11, #65535 -; CHECK-NEXT: movtgt r11, #1 +; CHECK-NEXT: movwgt r9, #65535 +; CHECK-NEXT: movtgt r9, #1 +; CHECK-NEXT: csinv r8, r0, zr, gt +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs.w r8, #0 ; CHECK-NEXT: movvs.w r9, #0 -; CHECK-NEXT: movvs.w r11, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movtlt r5, #65534 ; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movtlt r7, #65534 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov r1, s19 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #1 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: movwgt r7, #65535 +; CHECK-NEXT: movtgt r7, #1 ; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: csinv r10, r0, zr, gt +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: movvs.w r10, #0 +; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movtlt r7, #65534 ; CHECK-NEXT: vcmp.f32 s19, s20 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movtlt r5, #65534 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov r1, s16 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r7, #65535 -; CHECK-NEXT: movtgt r7, #1 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #1 ; CHECK-NEXT: vcmp.f32 s19, s19 +; CHECK-NEXT: csinv r6, r0, zr, gt +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: bfc r5, #18, #14 +; CHECK-NEXT: bfc r7, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: ittt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: bfc r9, #18, #14 +; CHECK-NEXT: lsrl r2, r7, #28 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r8] -; CHECK-NEXT: lsr.w r0, r7, #10 -; CHECK-NEXT: bfc r7, #18, #14 -; CHECK-NEXT: bfc r11, #18, #14 -; CHECK-NEXT: lsll r4, r7, #22 -; CHECK-NEXT: orr.w r3, r5, r7 -; CHECK-NEXT: str.w r3, [r8, #20] -; CHECK-NEXT: orr.w r2, r2, r4 -; CHECK-NEXT: str.w r2, [r8, #16] -; CHECK-NEXT: strb.w r0, [r8, #24] -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: lsrl r0, r11, #14 -; CHECK-NEXT: orr.w r2, r11, r6, lsl #4 -; CHECK-NEXT: strd r0, r2, [r8, #8] +; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: lsr.w r0, r5, #10 +; CHECK-NEXT: bfc r5, #18, #14 +; CHECK-NEXT: lsll r6, r5, #22 +; CHECK-NEXT: orr.w r3, r7, r5 +; CHECK-NEXT: str r3, [r4, #20] +; CHECK-NEXT: orr.w r2, r2, r6 +; CHECK-NEXT: str r2, [r4, #16] +; CHECK-NEXT: strb r0, [r4, #24] +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: lsrl r0, r9, #14 +; CHECK-NEXT: orr.w r2, r9, r10, lsl #4 +; CHECK-NEXT: strd r0, r2, [r4, #8] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: orr.w r0, r1, r9, lsl #18 -; CHECK-NEXT: str.w r0, [r8, #4] +; CHECK-NEXT: orr.w r0, r1, r8, lsl #18 +; CHECK-NEXT: str r0, [r4, #4] ; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI28_0: @@ -1880,98 +1873,93 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) { define arm_aapcs_vfpcc <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vldr s22, .LCPI29_0 -; CHECK-NEXT: mov r10, r1 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vldr s20, .LCPI29_1 -; CHECK-NEXT: vmov r9, s17 -; CHECK-NEXT: vcmp.f32 s19, s22 +; CHECK-NEXT: vmov r1, s19 +; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r10, #-2147483648 -; CHECK-NEXT: movlt.w r11, #0 +; CHECK-NEXT: movlt.w r4, #-2147483648 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r4, #-2147483648 +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: vmov r7, s17 +; CHECK-NEXT: vmov r8, s18 +; CHECK-NEXT: csinv r9, r0, zr, gt +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s19, s22 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s19, s20 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r5, #-2147483648 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: mvngt r10, #-2147483648 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r5, #-2147483648 ; CHECK-NEXT: vcmp.f32 s19, s19 +; CHECK-NEXT: csinv r10, r0, zr, gt +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r8, s16 ; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: movvs.w r11, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: vcmp.f32 s17, s22 +; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt.w r6, #-2147483648 -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: movlt.w r7, #-2147483648 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: itt gt -; CHECK-NEXT: mvngt r6, #-2147483648 -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r7, #-2147483648 +; CHECK-NEXT: vcmp.f32 s17, s17 +; CHECK-NEXT: csinv r6, r0, zr, gt +; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r4, #-2147483648 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: mvngt r4, #-2147483648 -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: vmov q1[2], q1[0], r7, r11 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: vmov q0[2], q0[0], r9, r6 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt.w r1, #-2147483648 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmov q1[3], q1[1], r6, r10 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt +; CHECK-NEXT: it gt ; CHECK-NEXT: mvngt r1, #-2147483648 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: vmov q0[3], q0[1], r4, r7 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: vmov q1[2], q1[0], r0, r10 +; CHECK-NEXT: vmov q1[3], q1[1], r1, r5 ; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI29_0: @@ -1997,25 +1985,27 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vmov r6, s17 ; CHECK-NEXT: vldr s22, .LCPI30_0 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vldr s20, .LCPI30_1 -; CHECK-NEXT: vmov r7, s19 +; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #7 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: mov r5, r3 ; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: vmov r7, s19 +; CHECK-NEXT: csinv r3, r6, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 @@ -2024,83 +2014,77 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: str.w r1, [r4, #29] ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r4, #25] -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: str.w r3, [r4, #25] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: mov r6, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: mvnlt r6, #7 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r6, #7 ; CHECK-NEXT: vcmp.f32 s17, s17 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r10, r2, zr, gt +; CHECK-NEXT: csinv r11, r1, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp] @ 4-byte Spill ; CHECK-NEXT: ittt vs ; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs.w r11, #0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: mov r8, r2 -; CHECK-NEXT: mov r9, r3 +; CHECK-NEXT: mov r7, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r9, #7 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vcmp.f32 s19, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: mvnlt r7, #7 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt.w r9, #7 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r7, #7 ; CHECK-NEXT: vcmp.f32 s19, s19 +; CHECK-NEXT: csinv r5, r0, zr, gt +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: csinv r9, r1, zr, gt +; CHECK-NEXT: csinv r8, r2, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs.w r9, #0 -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: movvs.w r11, #0 ; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt r3, #7 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 @@ -2111,31 +2095,31 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: lsrl r0, r11, #28 -; CHECK-NEXT: and r1, r9, #15 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: and r1, r7, #15 +; CHECK-NEXT: lsrl r0, r9, #28 ; CHECK-NEXT: str.w r0, [r4, #41] -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: lsrl r0, r5, #28 +; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: lsrl r0, r11, #28 ; CHECK-NEXT: str r0, [r4, #16] -; CHECK-NEXT: orr.w r0, r11, r8, lsl #4 +; CHECK-NEXT: orr.w r0, r9, r8, lsl #4 ; CHECK-NEXT: lsrl r8, r1, #28 ; CHECK-NEXT: str.w r0, [r4, #45] +; CHECK-NEXT: orr.w r0, r11, r10, lsl #4 +; CHECK-NEXT: str r0, [r4, #20] ; CHECK-NEXT: strb.w r8, [r4, #49] +; CHECK-NEXT: and r1, r6, #15 ; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: lsrl r10, r1, #28 ; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 ; CHECK-NEXT: str.w r0, [r4, #37] -; CHECK-NEXT: orr.w r0, r5, r6, lsl #4 -; CHECK-NEXT: str r0, [r4, #20] -; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r6, r1, #28 -; CHECK-NEXT: strb r6, [r4, #24] +; CHECK-NEXT: strb.w r10, [r4, #24] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: and r0, r3, #15 -; CHECK-NEXT: orr.w r0, r0, r10, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 ; CHECK-NEXT: str r0, [r4, #12] ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9, d10, d11} @@ -2154,122 +2138,141 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { define arm_aapcs_vfpcc <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i128: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vmov r5, s18 ; CHECK-NEXT: vldr s22, .LCPI31_0 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vldr s20, .LCPI31_1 -; CHECK-NEXT: add.w r12, r4, #48 -; CHECK-NEXT: vcmp.f32 s19, s22 +; CHECK-NEXT: mov r11, r3 +; CHECK-NEXT: vcmp.f32 s17, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt.w r11, #-2147483648 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s19, s20 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s19, s19 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r11, #-2147483648 +; CHECK-NEXT: vcmp.f32 s17, s17 +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: csinv r10, r7, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: movvs.w r10, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: vmov r7, s16 -; CHECK-NEXT: vmov r6, s17 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: add.w r12, r4, #32 +; CHECK-NEXT: mov r6, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt.w r6, #-2147483648 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r6, #-2147483648 ; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: csinv r9, r0, zr, gt +; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: csinv r5, r2, zr, gt +; CHECK-NEXT: csinv r8, r1, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: add.w r12, r4, #16 +; CHECK-NEXT: vcmp.f32 s19, s22 +; CHECK-NEXT: mov r7, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s19, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 +; CHECK-NEXT: movlt.w r7, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s17, s17 +; CHECK-NEXT: vcmp.f32 s19, s19 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r7, #-2147483648 +; CHECK-NEXT: add.w r3, r4, #48 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: ittt vs ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: stm r3!, {r0, r1, r2} +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: strd r9, r8, [r4, #32] +; CHECK-NEXT: str r5, [r4, #40] +; CHECK-NEXT: str.w r10, [r4, #16] +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: str r1, [r4, #20] +; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload +; CHECK-NEXT: str r1, [r4, #24] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: it gt ; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [r4, #8] +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [r4, #4] +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} +; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: str r7, [r4, #60] +; CHECK-NEXT: str r6, [r4, #44] +; CHECK-NEXT: str.w r11, [r4, #28] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: str r3, [r4, #12] +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI31_0: @@ -2303,66 +2306,62 @@ define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI32_0 -; CHECK-NEXT: vmov r8, r7, d8 -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: vmov r8, r4, d8 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: vmov r11, r3, d0 -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2iz ; CHECK-NEXT: vldr d0, .LCPI32_1 -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: csinv r7, r0, zr, eq ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: vmov r10, r3, d0 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: vmov r6, r5, d9 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r9, #-1 -; CHECK-NEXT: mov r10, r3 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r9, r3 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r3, r4 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r9, #0 +; CHECK-NEXT: movne r7, #0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r9, #0 -; CHECK-NEXT: and r0, r9, #1 -; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: and r0, r7, #1 +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: bfi r4, r0, #0, #1 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: bfi r7, r0, #0, #1 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2iz -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r7, #-1 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: csinv r4, r0, zr, eq +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r6 @@ -2370,17 +2369,17 @@ define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) { ; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: movne r4, #0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: and r0, r7, #1 +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: and r0, r4, #1 ; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: bfi r4, r0, #1, #1 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: strb r4, [r0] -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: bfi r7, r0, #1, #1 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: strb r7, [r0] +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2405,82 +2404,80 @@ define arm_aapcs_vfpcc <2 x i8> @test_signed_v2f64_v2i8(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI33_0 -; CHECK-NEXT: vmov r9, r8, d9 -; CHECK-NEXT: vmov r7, r3, d0 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: vmov r8, r7, d9 +; CHECK-NEXT: vmov r11, r5, d0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vldr d0, .LCPI33_1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: csinv r9, r1, zr, eq ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: vmov r11, r10, d8 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill -; CHECK-NEXT: itt eq -; CHECK-NEXT: moveq.w r4, #-1 -; CHECK-NEXT: mvneq r5, #127 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r6, r10, d8 +; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill +; CHECK-NEXT: it eq +; CHECK-NEXT: mvneq r4, #127 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movne r5, #127 -; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: movne r4, #127 +; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: itt eq +; CHECK-NEXT: it eq ; CHECK-NEXT: mvneq r7, #127 -; CHECK-NEXT: moveq.w r6, #-1 -; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: csinv r5, r1, zr, eq +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movne r5, #0 ; CHECK-NEXT: movne r7, #127 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r5, #0 ; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r7, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r4 -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r5, r9 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2505,84 +2502,82 @@ define arm_aapcs_vfpcc <2 x i13> @test_signed_v2f64_v2i13(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI34_0 -; CHECK-NEXT: vmov r9, r8, d9 -; CHECK-NEXT: vmov r7, r3, d0 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: vmov r8, r7, d9 +; CHECK-NEXT: vmov r11, r5, d0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vldr d0, .LCPI34_1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: csinv r9, r1, zr, eq ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: vmov r11, r10, d8 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill -; CHECK-NEXT: ittt eq -; CHECK-NEXT: movweq r5, #61440 -; CHECK-NEXT: movteq r5, #65535 -; CHECK-NEXT: moveq.w r4, #-1 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r6, r10, d8 +; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill +; CHECK-NEXT: itt eq +; CHECK-NEXT: movweq r4, #61440 +; CHECK-NEXT: movteq r4, #65535 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r5, #4095 -; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: movwne r4, #4095 +; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: ittt eq +; CHECK-NEXT: itt eq ; CHECK-NEXT: movweq r7, #61440 ; CHECK-NEXT: movteq r7, #65535 -; CHECK-NEXT: moveq.w r6, #-1 -; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: csinv r5, r1, zr, eq +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movne r5, #0 ; CHECK-NEXT: movwne r7, #4095 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movne r5, #0 ; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r7, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r4 -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r5, r9 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2607,84 +2602,82 @@ define arm_aapcs_vfpcc <2 x i16> @test_signed_v2f64_v2i16(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI35_0 -; CHECK-NEXT: vmov r9, r8, d9 -; CHECK-NEXT: vmov r7, r3, d0 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: vmov r8, r7, d9 +; CHECK-NEXT: vmov r11, r5, d0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vldr d0, .LCPI35_1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: csinv r9, r1, zr, eq ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: vmov r11, r10, d8 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill -; CHECK-NEXT: ittt eq -; CHECK-NEXT: movweq r5, #32768 -; CHECK-NEXT: movteq r5, #65535 -; CHECK-NEXT: moveq.w r4, #-1 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r6, r10, d8 +; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill +; CHECK-NEXT: itt eq +; CHECK-NEXT: movweq r4, #32768 +; CHECK-NEXT: movteq r4, #65535 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r5, #32767 -; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: movwne r4, #32767 +; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: ittt eq +; CHECK-NEXT: itt eq ; CHECK-NEXT: movweq r7, #32768 ; CHECK-NEXT: movteq r7, #65535 -; CHECK-NEXT: moveq.w r6, #-1 -; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: csinv r5, r1, zr, eq +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movne r5, #0 ; CHECK-NEXT: movwne r7, #32767 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movne r5, #0 ; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r7, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r4 -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r5, r9 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2709,94 +2702,92 @@ define arm_aapcs_vfpcc <2 x i19> @test_signed_v2f64_v2i19(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI36_0 -; CHECK-NEXT: vmov r5, r4, d9 -; CHECK-NEXT: vmov r7, r6, d0 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: vmov r9, r4, d9 +; CHECK-NEXT: vmov r6, r10, d0 +; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: strd r5, r4, [sp, #12] @ 8-byte Folded Spill +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vldr d0, .LCPI36_1 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r11, r5, d8 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: vmov r10, r0, d0 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: ittt eq -; CHECK-NEXT: movweq r8, #0 -; CHECK-NEXT: movteq r8, #65532 -; CHECK-NEXT: moveq.w r9, #-1 -; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: vmov r8, r11, d8 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: vmov r0, r7, d0 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: strd r7, r0, [sp] @ 8-byte Folded Spill +; CHECK-NEXT: csinv r0, r1, zr, eq +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq r4, #0 +; CHECK-NEXT: movteq r4, #65532 +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: ittt eq -; CHECK-NEXT: moveq r7, #0 -; CHECK-NEXT: movteq r7, #65532 -; CHECK-NEXT: moveq.w r6, #-1 -; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq r6, #0 +; CHECK-NEXT: movteq r6, #65532 +; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: csinv r10, r1, zr, eq +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: ittt ne -; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: movwne r7, #65535 -; CHECK-NEXT: movtne r7, #3 -; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: movne.w r10, #0 +; CHECK-NEXT: movwne r6, #65535 +; CHECK-NEXT: movtne r6, #3 +; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: ittt ne -; CHECK-NEXT: movne.w r9, #0 -; CHECK-NEXT: movwne r8, #65535 -; CHECK-NEXT: movtne r8, #3 +; CHECK-NEXT: movne r5, #0 +; CHECK-NEXT: movwne r4, #65535 +; CHECK-NEXT: movtne r4, #3 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movne.w r9, #0 -; CHECK-NEXT: movne.w r8, #0 -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: movne r5, #0 +; CHECK-NEXT: movne r4, #0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r10, #0 ; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r7, r8 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r9 -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: vmov q0[2], q0[0], r6, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r10, r5 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2821,82 +2812,80 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f64_v2i32_duplicate(<2 x double> ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI37_0 -; CHECK-NEXT: vmov r9, r8, d9 -; CHECK-NEXT: vmov r7, r3, d0 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: vmov r8, r7, d9 +; CHECK-NEXT: vmov r11, r5, d0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vldr d0, .LCPI37_1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: csinv r9, r1, zr, eq ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: vmov r11, r10, d8 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill -; CHECK-NEXT: itt eq -; CHECK-NEXT: moveq.w r4, #-1 -; CHECK-NEXT: moveq.w r5, #-2147483648 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r6, r10, d8 +; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq.w r4, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: itt ne -; CHECK-NEXT: mvnne r5, #-2147483648 -; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: mvnne r4, #-2147483648 +; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: itt eq +; CHECK-NEXT: it eq ; CHECK-NEXT: moveq.w r7, #-2147483648 -; CHECK-NEXT: moveq.w r6, #-1 -; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: csinv r5, r1, zr, eq +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movne r5, #0 ; CHECK-NEXT: mvnne r7, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r5, #0 ; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r7, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r4 -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r5, r9 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2925,67 +2914,72 @@ define arm_aapcs_vfpcc <2 x i50> @test_signed_v2f64_v2i50(<2 x double> %f) { ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI38_0 -; CHECK-NEXT: vmov r5, r4, d9 +; CHECK-NEXT: vmov r9, r4, d9 ; CHECK-NEXT: vmov r6, r7, d0 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: strd r4, r5, [sp, #8] @ 8-byte Folded Spill ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vmov r5, r10, d8 ; CHECK-NEXT: vldr d0, .LCPI38_1 -; CHECK-NEXT: cmp.w r11, #0 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vmov r9, r8, d0 -; CHECK-NEXT: csel r11, r0, r11, ne +; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: vmov r8, r1, d0 +; CHECK-NEXT: csel r0, r0, r11, ne ; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: str r1, [sp] @ 4-byte Spill ; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r4, #0 ; CHECK-NEXT: movteq r4, #65534 +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: csel r7, r0, r7, ne -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: csel r11, r0, r7, ne ; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r6, #0 ; CHECK-NEXT: movteq r6, #65534 +; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ittt ne -; CHECK-NEXT: movne.w r7, #-1 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: csinv r0, r11, zr, ne +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: itt ne ; CHECK-NEXT: movwne r6, #65535 ; CHECK-NEXT: movtne r6, #1 -; CHECK-NEXT: ldrd r9, r0, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: ldr.w r11, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ittt ne -; CHECK-NEXT: movne.w r11, #-1 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: csinv r7, r0, zr, ne +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: itt ne ; CHECK-NEXT: movwne r4, #65535 ; CHECK-NEXT: movtne r4, #1 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r2, r0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r5 @@ -2993,13 +2987,14 @@ define arm_aapcs_vfpcc <2 x i50> @test_signed_v2f64_v2i50(<2 x double> %f) { ; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movne.w r11, #0 +; CHECK-NEXT: movne r7, #0 ; CHECK-NEXT: movne r4, #0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r7, r11 +; CHECK-NEXT: movne r0, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r6, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r6, r4 @@ -3028,16 +3023,16 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI39_0 ; CHECK-NEXT: vmov r8, r7, d9 -; CHECK-NEXT: vmov r11, r5, d0 +; CHECK-NEXT: vmov r11, r3, d0 +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: mov r0, r8 @@ -3046,12 +3041,12 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) { ; CHECK-NEXT: vldr d0, .LCPI39_1 ; CHECK-NEXT: cmp.w r9, #0 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: csel r9, r0, r9, ne +; CHECK-NEXT: csel r5, r0, r9, ne ; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: vmov r6, r10, d8 -; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill +; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill ; CHECK-NEXT: it eq ; CHECK-NEXT: moveq.w r4, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpgt @@ -3060,18 +3055,18 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) { ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: mov r2, r8 ; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movne.w r9, #-1 +; CHECK-NEXT: it ne ; CHECK-NEXT: mvnne r4, #-2147483648 +; CHECK-NEXT: csinv r9, r5, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r9, #0 +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r6 @@ -3081,7 +3076,7 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) { ; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: moveq.w r7, #-2147483648 -; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload ; CHECK-NEXT: csel r5, r0, r5, ne ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r10 @@ -3091,9 +3086,9 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) { ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: itt ne +; CHECK-NEXT: it ne ; CHECK-NEXT: mvnne r7, #-2147483648 -; CHECK-NEXT: movne.w r5, #-1 +; CHECK-NEXT: csinv r5, r5, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: itt ne @@ -3101,7 +3096,7 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) { ; CHECK-NEXT: movne r7, #0 ; CHECK-NEXT: vmov q0[2], q0[0], r5, r9 ; CHECK-NEXT: vmov q0[3], q0[1], r7, r4 -; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -3126,105 +3121,108 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #48 -; CHECK-NEXT: sub sp, #48 +; CHECK-NEXT: .pad #40 +; CHECK-NEXT: sub sp, #40 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI40_0 -; CHECK-NEXT: vmov r7, r6, d8 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r10, r9, d0 +; CHECK-NEXT: vmov r7, r5, d8 +; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: vmov r9, r8, d0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: vldr d0, .LCPI40_1 ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: csel r4, r2, r4, ne -; CHECK-NEXT: vmov r5, r11, d0 -; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: vmov r6, r10, d0 +; CHECK-NEXT: strd r0, r3, [sp, #20] @ 8-byte Folded Spill ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: str r6, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: csinv r4, r4, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str.w r8, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: str.w r4, [r8, #8] -; CHECK-NEXT: str.w r9, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str.w r11, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: mov r6, r11 +; CHECK-NEXT: str.w r4, [r11, #8] +; CHECK-NEXT: mov r11, r9 +; CHECK-NEXT: str.w r9, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r9, r8 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr.w r8, [sp, #32] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: str.w r11, [sp, #40] @ 4-byte Spill +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r8 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: csinv r4, r4, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str.w r4, [r8, #4] +; CHECK-NEXT: str r4, [r6, #4] +; CHECK-NEXT: mov r6, r9 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r11, r8 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r7, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: csinv r4, r4, zr, ne +; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: str r5, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: vmov r9, r8, d9 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: str r4, [r0] -; CHECK-NEXT: ldr.w r11, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: ldr r5, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_dcmpge @@ -3233,101 +3231,99 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: strd r2, r3, [sp, #12] @ 8-byte Folded Spill -; CHECK-NEXT: csel r7, r1, r4, ne -; CHECK-NEXT: mov r4, r5 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: ldr r5, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: csel r4, r1, r4, ne +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: mov r11, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 +; CHECK-NEXT: csinv r4, r4, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: mov r7, r4 +; CHECK-NEXT: mov r10, r5 ; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: csel r6, r6, r0, ne +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 +; CHECK-NEXT: csinv r4, r4, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r0, #0 -; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: lsrl r0, r7, #28 ; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: str r0, [r1, #16] ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: str.w r10, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mov r6, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr.w r11, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r5, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: csinv r4, r4, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: ldr.w r11, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: orr.w r0, r7, r4, lsl #4 -; CHECK-NEXT: ldr r7, [sp, #44] @ 4-byte Reload ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: str r0, [r7, #20] +; CHECK-NEXT: str.w r0, [r11, #20] ; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: str r6, [sp] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: it eq ; CHECK-NEXT: mvneq r6, #7 +; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: mov r10, r5 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r9 @@ -3343,28 +3339,28 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: movne r0, #0 ; CHECK-NEXT: and r1, r0, #15 ; CHECK-NEXT: lsrl r4, r1, #28 -; CHECK-NEXT: strb r4, [r7, #24] -; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: ldr r3, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: strb.w r4, [r11, #24] +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r5, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: it eq ; CHECK-NEXT: mvneq r4, #7 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #7 ; CHECK-NEXT: bl __aeabi_dcmpun @@ -3375,8 +3371,8 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: and r0, r0, #15 ; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 -; CHECK-NEXT: str r0, [r7, #12] -; CHECK-NEXT: add sp, #48 +; CHECK-NEXT: str.w r0, [r11, #12] +; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -3401,243 +3397,246 @@ define arm_aapcs_vfpcc <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: .pad #48 +; CHECK-NEXT: sub sp, #48 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI41_0 -; CHECK-NEXT: vmov r8, r7, d9 -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: vmov r6, r5, d9 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: vldr d0, .LCPI41_1 -; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: vmov r10, r11, d0 -; CHECK-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r5, #-2147483648 -; CHECK-NEXT: str.w r11, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r5, #-2147483648 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: str.w r5, [r9, #28] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: str r4, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: csel r5, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: str.w r10, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r5, #-1 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: str.w r5, [r9, #24] -; CHECK-NEXT: mov r11, r6 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str.w r4, [r9, #20] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: vmov r6, r5, d8 -; CHECK-NEXT: mov r10, r9 -; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r9, r11 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr.w r11, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str.w r4, [r10, #16] +; CHECK-NEXT: str r2, [sp, #36] @ 4-byte Spill ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r11, r3 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r9, r3 -; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: vldr d0, .LCPI41_1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r9, #-2147483648 -; CHECK-NEXT: ldr.w r10, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r4, r2 +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: csel r7, r2, r7, ne +; CHECK-NEXT: vmov r9, r10, d0 +; CHECK-NEXT: strd r0, r3, [sp, #24] @ 8-byte Folded Spill ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: str.w r9, [sp, #44] @ 4-byte Spill +; CHECK-NEXT: str.w r10, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r9, #-2147483648 +; CHECK-NEXT: csinv r7, r7, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r9, #0 -; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: str.w r7, [r8, #24] +; CHECK-NEXT: mov r4, r8 +; CHECK-NEXT: ldr.w r8, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: str.w r11, [sp, #40] @ 4-byte Spill ; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: str.w r9, [r7, #12] -; CHECK-NEXT: ldr.w r9, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: csel r4, r4, r0, ne +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: csel r7, r1, r0, ne ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: csinv r7, r7, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str r4, [r7, #8] +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: str r7, [r4, #20] +; CHECK-NEXT: mov r9, r4 +; CHECK-NEXT: mov r10, r8 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: csel r4, r1, r0, ne +; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: csel r7, r1, r0, ne ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r3, r4 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: csinv r7, r7, zr, ne +; CHECK-NEXT: str r6, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: vmov r11, r8, d8 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str r4, [r7, #4] -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: str.w r7, [r9, #16] +; CHECK-NEXT: ldr r3, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: ldr.w r10, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: strd r1, r0, [sp, #12] @ 8-byte Folded Spill +; CHECK-NEXT: csel r7, r2, r7, ne +; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: mov r5, r4 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csinv r7, r7, zr, ne +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: str.w r7, [r9, #8] +; CHECK-NEXT: mov r6, r9 +; CHECK-NEXT: ldr.w r9, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: ldr r4, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: csel r7, r1, r0, ne +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csinv r7, r7, zr, ne +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: str r6, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r7, [r6, #4] +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr.w r9, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r5, r10 +; CHECK-NEXT: csel r7, r1, r0, ne +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csinv r7, r7, zr, ne +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: str r7, [r6] +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr.w r10, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ldr r4, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq.w r4, #-2147483648 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: mvnne r4, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str r4, [r7] -; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: str r4, [r5, #28] +; CHECK-NEXT: ldr r3, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq.w r4, #-2147483648 +; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: it ne +; CHECK-NEXT: mvnne r4, #-2147483648 +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #0 +; CHECK-NEXT: str r0, [r5, #12] +; CHECK-NEXT: add sp, #48 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -4288,101 +4287,98 @@ define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) { ; CHECK-NEXT: vcvtt.f32.f16 s24, s16 ; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s26, s17 -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: vmov r0, s26 ; CHECK-NEXT: vldr s22, .LCPI48_0 +; CHECK-NEXT: vcvtb.f32.f16 s26, s17 ; CHECK-NEXT: vldr s20, .LCPI48_1 +; CHECK-NEXT: vmov r2, s26 ; CHECK-NEXT: vcmp.f32 s24, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movtlt r1, #65534 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #1 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtt.f32.f16 s24, s17 -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: vcvtt.f32.f16 s24, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov r2, s24 +; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #1 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vcmp.f32 s26, s26 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s26, s18 -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: vmov r0, s26 ; CHECK-NEXT: vcmp.f32 s24, s22 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movtlt r1, #65534 ; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movtlt r5, #65534 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcvtb.f32.f16 s26, s18 +; CHECK-NEXT: vmov r1, s26 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r1, #65535 -; CHECK-NEXT: movtgt r1, #1 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #1 ; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [sp] @ 4-byte Spill +; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s26, s22 ; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: ittt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: movtlt r6, #65534 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: vcvtt.f32.f16 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt +; CHECK-NEXT: vcmp.f32 s26, s26 +; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r6, #65535 ; CHECK-NEXT: movtgt r6, #1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s26, s26 +; CHECK-NEXT: vcvtt.f32.f16 s18, s18 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 @@ -4392,124 +4388,122 @@ define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) { ; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: ittt lt -; CHECK-NEXT: movwlt r9, #0 -; CHECK-NEXT: movtlt r9, #65534 -; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movtlt r7, #65534 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r7, #65535 +; CHECK-NEXT: movtgt r7, #1 ; CHECK-NEXT: vcvtb.f32.f16 s18, s19 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movwgt r9, #65535 -; CHECK-NEXT: movtgt r9, #1 -; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: ittt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movtlt r5, #65534 -; CHECK-NEXT: movlt.w r11, #0 +; CHECK-NEXT: movwlt r9, #0 +; CHECK-NEXT: movtlt r9, #65534 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vcvtt.f32.f16 s18, s19 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r9, #65535 +; CHECK-NEXT: movtgt r9, #1 +; CHECK-NEXT: csinv r8, r0, zr, gt ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs.w r11, #0 -; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: movvs.w r9, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movtlt r7, #65534 -; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: ittt lt +; CHECK-NEXT: movwlt r11, #0 +; CHECK-NEXT: movtlt r11, #65534 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movwgt r7, #65535 -; CHECK-NEXT: movtgt r7, #1 +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r11, #65535 +; CHECK-NEXT: movtgt r11, #1 ; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: csinv r10, r0, zr, gt +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: movvs.w r11, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: bfc r5, #18, #14 +; CHECK-NEXT: bfc r9, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: ittt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: bfc r7, #18, #14 +; CHECK-NEXT: lsrl r2, r9, #28 +; CHECK-NEXT: bfc r6, #18, #14 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: lsr.w r0, r7, #10 -; CHECK-NEXT: bfc r7, #18, #14 -; CHECK-NEXT: bfc r9, #18, #14 -; CHECK-NEXT: lsll r10, r7, #22 -; CHECK-NEXT: bfc r6, #18, #14 -; CHECK-NEXT: orr.w r3, r5, r7 +; CHECK-NEXT: lsr.w r0, r11, #10 +; CHECK-NEXT: bfc r11, #18, #14 +; CHECK-NEXT: lsll r10, r11, #22 +; CHECK-NEXT: orr.w r3, r9, r11 ; CHECK-NEXT: str.w r3, [r4, #45] ; CHECK-NEXT: orr.w r2, r2, r10 ; CHECK-NEXT: str.w r2, [r4, #41] ; CHECK-NEXT: strb.w r0, [r4, #49] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: lsrl r0, r9, #14 -; CHECK-NEXT: orr.w r2, r9, r11, lsl #4 +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: lsrl r0, r7, #14 +; CHECK-NEXT: orr.w r2, r7, r8, lsl #4 ; CHECK-NEXT: str.w r2, [r4, #37] ; CHECK-NEXT: str.w r0, [r4, #33] -; CHECK-NEXT: orr.w r0, r6, r8, lsl #18 +; CHECK-NEXT: orr.w r0, r6, r3, lsl #18 ; CHECK-NEXT: str.w r0, [r4, #29] -; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: lsr.w r7, r5, #10 +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: bfc r5, #18, #14 ; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: lsr.w r5, r3, #10 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: bfc r3, #18, #14 -; CHECK-NEXT: lsll r0, r3, #22 -; CHECK-NEXT: mov r7, r3 -; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: bfc r3, #18, #14 +; CHECK-NEXT: lsll r0, r5, #22 ; CHECK-NEXT: lsrl r2, r3, #28 -; CHECK-NEXT: orr.w r3, r3, r7 +; CHECK-NEXT: orr.w r3, r3, r5 ; CHECK-NEXT: str r3, [r4, #20] ; CHECK-NEXT: orr.w r2, r2, r0 ; CHECK-NEXT: str r2, [r4, #16] -; CHECK-NEXT: strb r5, [r4, #24] +; CHECK-NEXT: strb r7, [r4, #24] ; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: bfc r3, #18, #14 @@ -4546,176 +4540,175 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtt.f32.f16 s24, s19 -; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: vcvtt.f32.f16 s22, s16 +; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s26, s19 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r0, s26 ; CHECK-NEXT: vldr s28, .LCPI49_0 -; CHECK-NEXT: vldr s30, .LCPI49_1 +; CHECK-NEXT: vcvtb.f32.f16 s20, s18 +; CHECK-NEXT: vcvtt.f32.f16 s24, s18 +; CHECK-NEXT: vldr s18, .LCPI49_1 +; CHECK-NEXT: vcmp.f32 s22, s28 ; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: vcmp.f32 s24, s28 -; CHECK-NEXT: vcvtt.f32.f16 s20, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcvtt.f32.f16 s26, s19 +; CHECK-NEXT: vcmp.f32 s22, s18 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r8, #-2147483648 -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vcmp.f32 s24, s30 -; CHECK-NEXT: vcvtt.f32.f16 s22, s18 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: vmov r1, s26 +; CHECK-NEXT: it gt ; CHECK-NEXT: mvngt r8, #-2147483648 -; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: vcmp.f32 s22, s22 +; CHECK-NEXT: vmov r4, s20 +; CHECK-NEXT: vmov r5, s24 +; CHECK-NEXT: csinv r7, r0, zr, gt +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r6, s20 -; CHECK-NEXT: vmov r4, s22 ; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vcmp.f32 s26, s28 -; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s26, s18 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt.w r11, #-2147483648 -; CHECK-NEXT: vcmp.f32 s26, s30 +; CHECK-NEXT: movlt.w r9, #-2147483648 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: itt gt -; CHECK-NEXT: mvngt r11, #-2147483648 -; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r9, #-2147483648 ; CHECK-NEXT: vcmp.f32 s26, s26 +; CHECK-NEXT: csinv r6, r0, zr, gt +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vcmp.f32 s22, s28 -; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: vcmp.f32 s24, s28 +; CHECK-NEXT: mov r10, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s18 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r4, #-2147483648 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s22, s30 +; CHECK-NEXT: movlt.w r10, #-2147483648 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: mvngt r4, #-2147483648 -; CHECK-NEXT: vcmp.f32 s22, s22 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r10, #-2147483648 +; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: csinv r5, r0, zr, gt +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: movvs.w r10, #0 ; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vcmp.f32 s20, s28 +; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s20, s18 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r6, #-2147483648 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s20, s30 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt.w r11, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: mvngt r6, #-2147483648 +; CHECK-NEXT: vcvtb.f32.f16 s24, s19 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r11, #-2147483648 ; CHECK-NEXT: vcmp.f32 s20, s20 +; CHECK-NEXT: csinv r1, r0, zr, gt +; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: vmov q5[2], q5[0], r1, r5 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s28 +; CHECK-NEXT: vcmp.f32 s24, s28 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s18 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt.w r1, #-2147483648 -; CHECK-NEXT: vcmp.f32 s16, s30 +; CHECK-NEXT: movlt.w r4, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: itt gt -; CHECK-NEXT: mvngt r1, #-2147483648 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r4, #-2147483648 +; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: vmov.f32 s19, s28 +; CHECK-NEXT: csinv r1, r0, zr, gt +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: vmov q6[2], q6[0], r1, r6 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s16, s19 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s18 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt.w r5, #-2147483648 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: vcvtt.f32.f16 s16, s17 -; CHECK-NEXT: vmov q5[2], q5[0], r0, r7 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r5, #-2147483648 +; CHECK-NEXT: csinv r1, r0, zr, gt ; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: it vs +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmov q5[3], q5[1], r1, r6 +; CHECK-NEXT: vmov q7[2], q7[0], r1, r7 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s19 ; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vcmp.f32 s16, s30 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s18 ; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt.w r6, #-2147483648 -; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: vcvtb.f32.f16 s16, s17 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: it gt ; CHECK-NEXT: mvngt r6, #-2147483648 +; CHECK-NEXT: csinv r7, r0, zr, gt ; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s28 +; CHECK-NEXT: vcmp.f32 s16, s19 +; CHECK-NEXT: vmov q7[3], q7[1], r5, r8 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s18 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt.w r1, #-2147483648 -; CHECK-NEXT: vcmp.f32 s16, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: itt gt +; CHECK-NEXT: vmov q5[3], q5[1], r11, r10 +; CHECK-NEXT: vmov q6[3], q6[1], r4, r9 +; CHECK-NEXT: it gt ; CHECK-NEXT: mvngt r1, #-2147483648 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: vmov q0, q7 +; CHECK-NEXT: vmov q2, q5 +; CHECK-NEXT: vmov q3, q6 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vcvtb.f32.f16 s16, s18 -; CHECK-NEXT: vmov q6[2], q6[0], r0, r7 -; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vmov q1[2], q1[0], r0, r7 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmov q6[3], q6[1], r1, r6 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: vmov q3[2], q3[0], r10, r9 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt.w r1, #-2147483648 -; CHECK-NEXT: vcmp.f32 s16, s30 -; CHECK-NEXT: vmov q3[3], q3[1], r11, r8 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: mvngt r1, #-2147483648 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: vmov q0, q5 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmov q2[2], q2[0], r0, r5 -; CHECK-NEXT: vmov q1, q6 -; CHECK-NEXT: vmov q2[3], q2[1], r1, r4 +; CHECK-NEXT: vmov q1[3], q1[1], r1, r6 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -4738,88 +4731,84 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: .pad #48 -; CHECK-NEXT: sub sp, #48 +; CHECK-NEXT: .pad #56 +; CHECK-NEXT: sub sp, #56 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vcvtb.f32.f16 s24, s17 ; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcvtb.f32.f16 s26, s18 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r0, s26 ; CHECK-NEXT: vldr s22, .LCPI50_0 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vldr s20, .LCPI50_1 -; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: vcvtb.f32.f16 s26, s18 ; CHECK-NEXT: vcmp.f32 s24, s22 -; CHECK-NEXT: mov r10, r2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: vmov r0, s26 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: csinv r8, r2, zr, gt +; CHECK-NEXT: csinv r10, r1, zr, gt +; CHECK-NEXT: csinv r9, r7, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #48] @ 4-byte Spill ; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs.w r8, #0 ; CHECK-NEXT: movvs.w r9, #0 ; CHECK-NEXT: movvs.w r10, #0 +; CHECK-NEXT: movvs.w r8, #0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcvtb.f32.f16 s24, s19 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vcmp.f32 s26, s22 -; CHECK-NEXT: mov r7, r2 +; CHECK-NEXT: vcvtb.f32.f16 s24, s19 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: vcmp.f32 s26, s26 +; CHECK-NEXT: csinv r7, r0, zr, gt +; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: csinv r6, r2, zr, gt +; CHECK-NEXT: csinv r5, r1, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #44] @ 4-byte Spill ; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s24, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: itttt gt +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #7 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: vcvtt.f32.f16 s24, s16 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 @@ -4829,154 +4818,150 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: str.w r1, [r4, #79] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vcvtt.f32.f16 s24, s16 ; CHECK-NEXT: str.w r0, [r4, #75] ; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: str.w r7, [r4, #58] -; CHECK-NEXT: str.w r6, [r4, #54] -; CHECK-NEXT: str.w r5, [r4, #50] -; CHECK-NEXT: str.w r10, [r4, #33] -; CHECK-NEXT: str.w r9, [r4, #29] -; CHECK-NEXT: str.w r8, [r4, #25] +; CHECK-NEXT: str.w r6, [r4, #58] +; CHECK-NEXT: str.w r5, [r4, #54] +; CHECK-NEXT: str.w r7, [r4, #50] +; CHECK-NEXT: str.w r8, [r4, #33] +; CHECK-NEXT: str.w r10, [r4, #29] +; CHECK-NEXT: str.w r9, [r4, #25] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s24, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r3, #7 ; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: mvnlt r3, #7 ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: vcvtt.f32.f16 s24, s17 +; CHECK-NEXT: csinv r7, r1, zr, gt +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r1, r2, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [sp, #32] @ 4-byte Spill ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vcvtt.f32.f16 s24, s17 -; CHECK-NEXT: str r0, [sp, #40] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #52] @ 4-byte Spill ; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: str r7, [sp, #28] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s24, s22 ; CHECK-NEXT: vcvtt.f32.f16 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r3, #7 ; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: mvnlt r3, #7 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: csinv r7, r1, zr, gt +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r1, r2, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: str r7, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r11, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: mov r8, r2 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: mvnlt r11, #7 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r11, #7 ; CHECK-NEXT: vcvtt.f32.f16 s18, s19 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r10, r2, zr, gt +; CHECK-NEXT: csinv r7, r1, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp] @ 4-byte Spill ; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: movvs.w r10, #0 +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r10, r2 -; CHECK-NEXT: mov r11, r3 +; CHECK-NEXT: mov r5, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r11, #7 -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: mvnlt r5, #7 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movgt.w r11, #7 +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r5, #7 ; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: csinv r8, r0, zr, gt +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: csinv r9, r1, zr, gt +; CHECK-NEXT: csinv r6, r2, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs.w r11, #0 -; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: str.w r8, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: mov r12, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: mvnlt r12, #7 -; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r12, #7 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: lsrl r8, r9, #28 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r12, #7 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 @@ -4987,66 +4972,66 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: lsrl r0, r9, #28 -; CHECK-NEXT: str.w r0, [r4, #91] -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: str.w r8, [r4, #91] +; CHECK-NEXT: ldr.w lr, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r0, lr ; CHECK-NEXT: lsrl r0, r7, #28 ; CHECK-NEXT: str.w r0, [r4, #66] -; CHECK-NEXT: ldr.w lr, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r0, lr +; CHECK-NEXT: ldr.w r8, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: lsrl r0, r3, #28 ; CHECK-NEXT: str.w r0, [r4, #41] -; CHECK-NEXT: ldrd r0, r1, [sp, #40] @ 8-byte Folded Reload +; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: and r1, r11, #15 ; CHECK-NEXT: str r0, [r4, #16] -; CHECK-NEXT: orr.w r0, r9, r10, lsl #4 -; CHECK-NEXT: lsrl r10, r1, #28 +; CHECK-NEXT: orr.w r0, r9, r6, lsl #4 ; CHECK-NEXT: str.w r0, [r4, #95] -; CHECK-NEXT: strb.w r10, [r4, #99] -; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r7, r10, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #70] +; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r3, r7, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #45] +; CHECK-NEXT: ldr r2, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r1, r2, lsl #4 +; CHECK-NEXT: and r1, r5, #15 +; CHECK-NEXT: lsrl r6, r1, #28 +; CHECK-NEXT: str r0, [r4, #20] +; CHECK-NEXT: strb.w r6, [r4, #99] +; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r6, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 +; CHECK-NEXT: and r1, r11, #15 +; CHECK-NEXT: lsrl r10, r1, #28 ; CHECK-NEXT: str.w r0, [r4, #87] -; CHECK-NEXT: orr.w r0, r7, r8, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #70] -; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r8, r1, #28 -; CHECK-NEXT: strb.w r8, [r4, #74] -; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: strb.w r10, [r4, #74] +; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload ; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 +; CHECK-NEXT: orr.w r0, r0, lr, lsl #4 ; CHECK-NEXT: str.w r0, [r4, #62] -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: orr.w r0, r3, r2, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #45] -; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r4, #49] -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: strb.w r0, [r4, #49] +; CHECK-NEXT: ldr r0, [sp, #48] @ 4-byte Reload ; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, lr, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r8, lsl #4 ; CHECK-NEXT: str.w r0, [r4, #37] -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 -; CHECK-NEXT: str r0, [r4, #20] -; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb r2, [r4, #24] +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: strb r0, [r4, #24] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs.w r12, #0 -; CHECK-NEXT: ldr r1, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #52] @ 4-byte Reload ; CHECK-NEXT: and r0, r12, #15 ; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 ; CHECK-NEXT: str r0, [r4, #12] -; CHECK-NEXT: add sp, #48 +; CHECK-NEXT: add sp, #56 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -5063,244 +5048,308 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-LABEL: test_signed_v8f16_v8i128: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: .pad #72 +; CHECK-NEXT: sub sp, #72 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtt.f32.f16 s30, s19 -; CHECK-NEXT: vcvtb.f32.f16 s20, s16 -; CHECK-NEXT: vmov r0, s30 -; CHECK-NEXT: vcvtb.f32.f16 s26, s19 -; CHECK-NEXT: vldr s22, .LCPI51_0 -; CHECK-NEXT: vmov r5, s20 -; CHECK-NEXT: vmov r7, s26 -; CHECK-NEXT: vcvtt.f32.f16 s28, s18 +; CHECK-NEXT: vcvtt.f32.f16 s24, s16 +; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vldr s24, .LCPI51_1 -; CHECK-NEXT: add.w r12, r4, #112 -; CHECK-NEXT: vmov r6, s28 -; CHECK-NEXT: vcvtb.f32.f16 s18, s18 -; CHECK-NEXT: vcmp.f32 s30, s24 +; CHECK-NEXT: vldr s22, .LCPI51_0 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vldr s20, .LCPI51_1 +; CHECK-NEXT: vcvtb.f32.f16 s26, s17 +; CHECK-NEXT: vcmp.f32 s24, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s30, s22 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: vmov r0, s26 +; CHECK-NEXT: it gt ; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s30, s30 +; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: csinv r7, r7, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r3, [sp, #68] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: str r7, [sp, #60] @ 4-byte Spill +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: str r1, [sp, #56] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [sp, #52] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s26, s24 -; CHECK-NEXT: add.w r12, r4, #96 +; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: vcvtt.f32.f16 s24, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: it gt ; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vcmp.f32 s26, s26 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r3, [sp, #64] @ 4-byte Spill +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov r7, s18 -; CHECK-NEXT: vcvtt.f32.f16 s26, s17 +; CHECK-NEXT: str r0, [sp, #44] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [sp, #40] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [sp, #36] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s28, s24 -; CHECK-NEXT: add.w r12, r4, #80 +; CHECK-NEXT: vcmp.f32 s24, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s28, s22 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: it gt ; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vcvtb.f32.f16 s24, s18 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r3, [sp, #48] @ 4-byte Spill +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: vmov r6, s26 -; CHECK-NEXT: vcvtb.f32.f16 s28, s17 +; CHECK-NEXT: str r0, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s18, s24 -; CHECK-NEXT: add.w r12, r4, #64 +; CHECK-NEXT: vcmp.f32 s24, s22 +; CHECK-NEXT: mov r11, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 +; CHECK-NEXT: movlt.w r11, #-2147483648 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r11, #-2147483648 +; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: vcvtt.f32.f16 s18, s18 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov r7, s28 +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s26, s24 -; CHECK-NEXT: add.w r12, r4, #48 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r6, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: movlt.w r6, #-2147483648 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s26, s26 +; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r6, #-2147483648 +; CHECK-NEXT: vcvtb.f32.f16 s18, s19 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: vmov r6, s16 +; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s28, s24 -; CHECK-NEXT: add.w r12, r4, #32 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r7, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s28, s22 +; CHECK-NEXT: movlt.w r7, #-2147483648 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s28, s28 +; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: vcvtt.f32.f16 s18, s19 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r7, #-2147483648 +; CHECK-NEXT: csinv r10, r0, zr, gt +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: csinv r8, r2, zr, gt +; CHECK-NEXT: csinv r9, r1, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: movvs.w r10, #0 +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: movvs.w r8, #0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s16, s24 -; CHECK-NEXT: add.w r12, r4, #16 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r5, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s22 +; CHECK-NEXT: movlt.w r5, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: add.w r3, r4, #112 +; CHECK-NEXT: it gt +; CHECK-NEXT: mvngt r5, #-2147483648 +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: ittt vs ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: stm r3!, {r0, r1, r2} +; CHECK-NEXT: strd r10, r9, [r4, #96] +; CHECK-NEXT: str.w r8, [r4, #104] +; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #80] +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #84] +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #88] +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #64] +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #68] +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #72] +; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #48] +; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #52] +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #56] +; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #32] +; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #36] +; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #40] +; CHECK-NEXT: ldr r0, [sp, #60] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #16] +; CHECK-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #20] +; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #24] +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s20, s24 +; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s20, s22 +; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: it gt ; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s20, s20 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [r4, #8] +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [r4, #4] +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: str r5, [r4, #124] +; CHECK-NEXT: str r7, [r4, #108] +; CHECK-NEXT: str r6, [r4, #92] +; CHECK-NEXT: str.w r11, [r4, #76] +; CHECK-NEXT: ldr r0, [sp, #48] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #60] +; CHECK-NEXT: ldr r0, [sp, #64] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #44] +; CHECK-NEXT: ldr r0, [sp, #68] @ 4-byte Reload +; CHECK-NEXT: str r0, [r4, #28] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: str r3, [r4, #12] +; CHECK-NEXT: add sp, #72 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI51_0: -; CHECK-NEXT: .long 0x7effffff @ float 1.70141173E+38 -; CHECK-NEXT: .LCPI51_1: ; CHECK-NEXT: .long 0xff000000 @ float -1.70141183E+38 +; CHECK-NEXT: .LCPI51_1: +; CHECK-NEXT: .long 0x7effffff @ float 1.70141173E+38 %x = call <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half> %f) ret <8 x i128> %x } diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll index ee040feca4240..bd109d0e92457 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -35,31 +35,31 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f32_v2i32(<2 x float> %f) { ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vldr s18, .LCPI1_0 ; CHECK-NEXT: vcmp.f32 s17, #0 ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s17, s18 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vcmp.f32 s17, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt +; CHECK-NEXT: vmov r1, s16 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r4, #0 -; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: csinv r5, r0, zr, gt +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s18 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s18 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r1, #0 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9} @@ -288,9 +288,7 @@ define arm_aapcs_vfpcc <1 x i32> @test_unsigned_v1f64_v1i32(<1 x double> %f) { ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: csinv r0, r6, zr, ne ; CHECK-NEXT: pop {r4, r5, r6, pc} ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: @@ -340,13 +338,13 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) { ; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: itt ne +; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r8, #0 -; CHECK-NEXT: movne.w r9, #-1 ; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: csinv r9, r9, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r5 @@ -354,18 +352,18 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) { ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r7, r1, r6, ne -; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: csel r7, r0, r6, ne +; CHECK-NEXT: csel r6, r1, r6, ne ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r6, r9 -; CHECK-NEXT: vmov q0[3], q0[1], r7, r8 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: csinv r0, r7, zr, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r9 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r8 ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 @@ -397,75 +395,74 @@ define arm_aapcs_vfpcc <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) { ; CHECK-NEXT: vmov.f32 s19, s1 ; CHECK-NEXT: vldr d0, .LCPI10_0 ; CHECK-NEXT: vmov r8, r9, d1 -; CHECK-NEXT: vmov r5, r4, d0 +; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: vmov.f32 s16, s4 ; CHECK-NEXT: vmov.f32 s17, s5 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r9 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vldr d0, .LCPI10_1 -; CHECK-NEXT: vmov r11, r1, d9 ; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: vmov r7, r6, d8 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: csel r10, r0, r10, ne +; CHECK-NEXT: csel r4, r0, r10, ne ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: mov r9, r2 -; CHECK-NEXT: mov r8, r3 +; CHECK-NEXT: vmov r7, r6, d8 +; CHECK-NEXT: vmov r5, r11, d9 +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r10, r3 ; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: ldr.w r9, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ldr.w r8, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: csinv r0, r4, zr, ne +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #-1 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: csel r5, r0, r4, ne +; CHECK-NEXT: ldr r6, [sp] @ 4-byte Reload +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: csel r4, r0, r4, ne ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r7, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r5, #-1 -; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csinv r10, r4, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: csel r4, r0, r4, ne -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vmov.32 q0[1], r10 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r4, r5 +; CHECK-NEXT: csinv r0, r4, zr, ne +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r10 ; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 @@ -491,109 +488,105 @@ define arm_aapcs_vfpcc <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #32 +; CHECK-NEXT: sub sp, #32 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI11_0 +; CHECK-NEXT: vmov r7, r6, d9 ; CHECK-NEXT: vmov q5, q1 -; CHECK-NEXT: vmov r8, r9, d10 -; CHECK-NEXT: vmov r2, r11, d0 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: vmov r11, r4, d0 +; CHECK-NEXT: str r7, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str r6, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r4 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: vmov r5, r0, d11 ; CHECK-NEXT: vldr d0, .LCPI11_1 -; CHECK-NEXT: vmov r5, r1, d11 +; CHECK-NEXT: vmov r9, r8, d10 +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-NEXT: vmov r1, r0, d0 +; CHECK-NEXT: strd r1, r0, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: vmov r6, r7, d8 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: csel r4, r0, r10, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: strd r5, r1, [sp, #4] @ 8-byte Folded Spill -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-NEXT: mov r8, r2 -; CHECK-NEXT: mov r9, r3 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: csel r0, r0, r10, ne +; CHECK-NEXT: ldr.w r10, [sp] @ 4-byte Reload +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: csel r0, r0, r6, ne +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: ldr.w r9, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: mov r5, r7 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: csinv r8, r6, zr, ne +; CHECK-NEXT: vmov r6, r7, d8 +; CHECK-NEXT: mov r3, r4 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: mov r10, r11 -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: csel r4, r0, r4, ne ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r8, r9 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: ldr.w r11, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: ldr.w r9, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: ldr.w r9, [sp] @ 4-byte Reload -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r5, r0, r4, ne -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: vmov r7, r6, d9 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: csinv r4, r4, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r5, #-1 -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: csel r4, r0, r4, ne -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: vmov q4[2], q4[0], r4, r8 ; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csinv r4, r0, zr, ne +; CHECK-NEXT: ldrd r0, r1, [sp, #20] @ 8-byte Folded Reload +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: ldrd r1, r0, [sp, #12] @ 8-byte Folded Reload -; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 -; CHECK-NEXT: vmov q0[3], q0[1], r4, r5 -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: csinv r0, r0, zr, ne +; CHECK-NEXT: vmov q4[3], q4[1], r0, r4 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -620,52 +613,85 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) { ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: .pad #32 ; CHECK-NEXT: sub sp, #32 -; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmov.f32 s17, s1 +; CHECK-NEXT: vmov.f32 s18, s0 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: vmov.f32 s19, s1 ; CHECK-NEXT: vldr d0, .LCPI12_0 -; CHECK-NEXT: vmov r6, r11, d4 +; CHECK-NEXT: vmov r6, r4, d4 ; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: vmov.f32 s18, s6 -; CHECK-NEXT: vmov.f32 s20, s4 -; CHECK-NEXT: vmov.f32 s22, s2 -; CHECK-NEXT: vmov.f32 s19, s7 -; CHECK-NEXT: vmov.f32 s21, s5 -; CHECK-NEXT: vmov.f32 s23, s3 +; CHECK-NEXT: vmov.f32 s20, s6 +; CHECK-NEXT: vmov.f32 s22, s4 +; CHECK-NEXT: vmov.f32 s16, s2 +; CHECK-NEXT: vmov.f32 s21, s7 +; CHECK-NEXT: vmov.f32 s23, s5 +; CHECK-NEXT: vmov.f32 s17, s3 ; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: vmov r8, r1, d11 ; CHECK-NEXT: vldr d0, .LCPI12_1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: vmov r10, r9, d9 +; CHECK-NEXT: vmov r5, r10, d10 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: csel r4, r0, r4, ne +; CHECK-NEXT: csel r7, r0, r7, ne ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: vmov r5, r1, d10 -; CHECK-NEXT: strd r5, r1, [sp, #12] @ 8-byte Folded Spill +; CHECK-NEXT: vmov r8, r11, d11 +; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: csinv r0, r7, zr, ne +; CHECK-NEXT: mov r7, r10 +; CHECK-NEXT: str.w r0, [r9, #16] +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: str.w r10, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr.w r10, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r11, r2 -; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: csel r0, r0, r4, ne +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: mov r5, r11 +; CHECK-NEXT: mov r11, r10 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r4, r0, r4, ne +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: vmov r10, r9, d9 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r7, #16] ; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: csinv r8, r4, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r10 @@ -675,82 +701,43 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) { ; CHECK-NEXT: mov r1, r9 ; CHECK-NEXT: csel r4, r0, r4, ne ; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: mov r10, r11 -; CHECK-NEXT: mov r11, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: ldrd r0, r1, [sp, #12] @ 8-byte Folded Reload ; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r9, r7 +; CHECK-NEXT: csinv r6, r4, zr, ne +; CHECK-NEXT: mov r7, r5 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: vmov r4, r5, d8 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: vmov q4[2], q4[0], r6, r8 +; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: csinv r8, r0, zr, ne +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: csel r9, r0, r4, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: mov r8, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r9, #-1 -; CHECK-NEXT: ldr.w r11, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: ldr r5, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: csel r7, r0, r4, ne -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vmov r4, r5, d8 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 -; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: csel r6, r0, r6, ne ; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r6, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r9, r0 +; CHECK-NEXT: csinv r0, r6, zr, ne +; CHECK-NEXT: vmov q4[3], q4[1], r0, r8 ; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vstrw.32 q4, [r0] ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: add sp, #4 @@ -778,141 +765,140 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) { ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov.f32 s16, s0 +; CHECK-NEXT: vmov.f32 s18, s0 ; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill -; CHECK-NEXT: vmov.f32 s17, s1 +; CHECK-NEXT: vmov.f32 s19, s1 ; CHECK-NEXT: vldr d0, .LCPI13_0 ; CHECK-NEXT: vmov r5, r6, d5 -; CHECK-NEXT: vmov r10, r3, d0 +; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: vmov.f32 s20, s8 ; CHECK-NEXT: vmov.f32 s22, s6 -; CHECK-NEXT: vmov.f32 s18, s4 -; CHECK-NEXT: vmov.f32 s24, s2 +; CHECK-NEXT: vmov.f32 s24, s4 +; CHECK-NEXT: vmov.f32 s16, s2 ; CHECK-NEXT: vmov.f32 s21, s9 ; CHECK-NEXT: vmov.f32 s23, s7 -; CHECK-NEXT: vmov.f32 s19, s5 -; CHECK-NEXT: vmov.f32 s25, s3 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: vmov.f32 s25, s5 +; CHECK-NEXT: vmov.f32 s17, s3 +; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: vmov r9, r1, d11 +; CHECK-NEXT: vmov r2, r1, d11 ; CHECK-NEXT: vldr d0, .LCPI13_1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: vmov r8, r11, d10 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: csel r4, r0, r4, ne +; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: vmov r10, r9, d10 +; CHECK-NEXT: vmov r11, r4, d0 +; CHECK-NEXT: csel r7, r0, r8, ne ; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: str r1, [sp, #32] @ 4-byte Spill -; CHECK-NEXT: vmov r7, r1, d12 -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: strd r7, r1, [sp, #24] @ 8-byte Folded Spill +; CHECK-NEXT: strd r2, r1, [sp, #28] @ 8-byte Folded Spill +; CHECK-NEXT: vmov r2, r1, d12 +; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: strd r2, r1, [sp, #12] @ 8-byte Folded Spill ; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: ldr r6, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: ldr r7, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: str r4, [r7, #20] -; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: csinv r0, r7, zr, ne +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: str r0, [r6, #20] +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r1, r9 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: vmov r2, r1, d9 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r4, r0, r4, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r8, r5 -; CHECK-NEXT: strd r2, r1, [sp, #16] @ 8-byte Folded Spill -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: ldr.w r11, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: csel r5, r0, r5, ne +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r4 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r7, #16] -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: csinv r0, r5, zr, ne +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: str r0, [r6, #16] +; CHECK-NEXT: mov r9, r7 +; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: ldr.w r10, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: csel r0, r0, r5, ne +; CHECK-NEXT: ldr r5, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: csel r4, r0, r4, ne -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: str.w r8, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: str r4, [sp, #32] @ 4-byte Spill -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r7, r5 ; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: ldr.w r8, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r4, r0, r4, ne +; CHECK-NEXT: mov r0, r10 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: csel r9, r0, r4, ne -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: vmov r6, r5, d9 ; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: ldr.w r10, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r9, #-1 -; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: csinv r0, r4, zr, ne ; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: mov r7, r9 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: csel r8, r0, r4, ne -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: csel r4, r0, r4, ne +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ldrd r0, r1, [sp, #28] @ 8-byte Folded Reload +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csinv r6, r4, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: vmov r4, r5, d8 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r8, #-1 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: vmov q4[2], q4[0], r6, r0 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: csinv r9, r0, zr, ne ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpge @@ -924,17 +910,14 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) { ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: csel r6, r0, r6, ne ; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r6, r8 -; CHECK-NEXT: vmov q0[3], q0[1], r9, r0 +; CHECK-NEXT: csinv r0, r6, zr, ne +; CHECK-NEXT: vmov q4[3], q4[1], r0, r9 ; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vstrw.32 q4, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} ; CHECK-NEXT: add sp, #4 @@ -985,32 +968,32 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) { ; CHECK-NEXT: vcvtt.f32.f16 s18, s16 ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vldr s20, .LCPI15_0 ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt +; CHECK-NEXT: vmov r1, s16 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r4, #0 -; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: csinv r5, r0, zr, gt +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r1, #0 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9, d10} @@ -1425,91 +1408,87 @@ define arm_aapcs_vfpcc <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) { ; CHECK-NEXT: .vsave {d8, d9, d10} ; CHECK-NEXT: vpush {d8, d9, d10} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r7, s18 +; CHECK-NEXT: vmov r8, s18 ; CHECK-NEXT: vldr s20, .LCPI28_0 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r9, #0 ; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt.w r9, #0 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r10, r1 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r7, #-1 ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #3 +; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: csinv r2, r0, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r10, #65535 -; CHECK-NEXT: movtgt r10, #3 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: movwgt r6, #65535 +; CHECK-NEXT: movtgt r6, #3 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r8, r2 ; CHECK-NEXT: bfc r3, #18, #14 -; CHECK-NEXT: mov r6, r7 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: lsll r4, r3, #22 -; CHECK-NEXT: lsrl r6, r1, #28 +; CHECK-NEXT: lsrl r8, r1, #28 +; CHECK-NEXT: csinv r4, r7, zr, gt ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: str.w r5, [r8] +; CHECK-NEXT: lsll r4, r3, #22 +; CHECK-NEXT: mov r7, r2 +; CHECK-NEXT: orr.w r1, r1, r3 +; CHECK-NEXT: csinv r0, r9, zr, gt +; CHECK-NEXT: str.w r0, [r10] +; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r9, #65535 -; CHECK-NEXT: movtgt r9, #3 -; CHECK-NEXT: orrs r1, r3 -; CHECK-NEXT: str.w r1, [r8, #20] +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #3 +; CHECK-NEXT: str.w r1, [r10, #20] ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: orr.w r2, r6, r4 +; CHECK-NEXT: orr.w r2, r8, r4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: bfc r9, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #3 -; CHECK-NEXT: str.w r2, [r8, #16] -; CHECK-NEXT: lsr.w r2, r10, #10 -; CHECK-NEXT: strb.w r2, [r8, #24] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: str.w r2, [r10, #16] +; CHECK-NEXT: lsr.w r2, r6, #10 ; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: strb.w r2, [r10, #24] +; CHECK-NEXT: bfc r5, #18, #14 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: orr.w r0, r5, r0, lsl #18 ; CHECK-NEXT: lsrl r2, r1, #14 -; CHECK-NEXT: orr.w r0, r9, r0, lsl #18 ; CHECK-NEXT: orr.w r1, r1, r7, lsl #4 -; CHECK-NEXT: strd r2, r1, [r8, #8] -; CHECK-NEXT: str.w r0, [r8, #4] +; CHECK-NEXT: strd r2, r1, [r10, #8] +; CHECK-NEXT: str.w r0, [r10, #4] ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} ; CHECK-NEXT: .p2align 2 @@ -1523,77 +1502,69 @@ define arm_aapcs_vfpcc <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) { define arm_aapcs_vfpcc <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10} -; CHECK-NEXT: vpush {d8, d9, d10} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: vldr s20, .LCPI29_0 -; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: mov r10, r1 +; CHECK-NEXT: vldr s24, .LCPI29_0 +; CHECK-NEXT: vcmp.f32 s17, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: vcmp.f32 s17, s24 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: vcmp.f32 s19, s20 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r9, s17 -; CHECK-NEXT: vmov r8, s16 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: vmov r5, s19 +; CHECK-NEXT: vmov r8, s18 +; CHECK-NEXT: csinv r7, r0, zr, gt +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: csinv r9, r1, zr, gt ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: vcmp.f32 s16, s24 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: csinv r6, r0, zr, gt +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: csinv r4, r1, zr, gt ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: vcmp.f32 s19, #0 +; CHECK-NEXT: vmov q5[2], q5[0], r6, r7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: vcmp.f32 s19, s24 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r4, #-1 -; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: vmov q5[3], q5[1], r4, r9 +; CHECK-NEXT: csinv r5, r0, zr, gt +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: csinv r4, r1, zr, gt ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: vmov q1[2], q1[0], r7, r11 +; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: vmov q0, q5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s24 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmov q1[3], q1[1], r6, r10 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 -; CHECK-NEXT: vpop {d8, d9, d10} +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: vmov q1[2], q1[0], r0, r5 +; CHECK-NEXT: csinv r0, r1, zr, gt +; CHECK-NEXT: vmov q1[3], q1[1], r0, r4 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI29_0: @@ -1611,124 +1582,116 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10} ; CHECK-NEXT: vpush {d8, d9, d10} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vmov r6, s17 ; CHECK-NEXT: vldr s20, .LCPI30_0 -; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: vcmp.f32 s17, #0 +; CHECK-NEXT: mov r9, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: movlt.w r9, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov r3, s18 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str.w r2, [r4, #33] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: str.w r1, [r4, #29] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r4, #25] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: vmov r7, s19 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: movgt.w r9, #15 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: csinv r5, r1, zr, gt ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: mov r6, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: movgt r6, #15 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r11, r2, zr, gt +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: csinv r0, r1, zr, gt +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: mov r8, r2 -; CHECK-NEXT: mov r11, r3 +; CHECK-NEXT: mov r7, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s19, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r9, #0 ; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: vcmp.f32 s19, s20 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r11, #15 -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: csinv r8, r0, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: csinv r10, r2, zr, gt +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: orr.w r1, r1, r10, lsl #4 +; CHECK-NEXT: str.w r1, [r4, #45] +; CHECK-NEXT: str.w r0, [r4, #41] +; CHECK-NEXT: str.w r11, [r4, #33] +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: str.w r0, [r4, #29] +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: str.w r0, [r4, #25] +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr.w r11, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: lsrl r0, r5, #28 +; CHECK-NEXT: orr.w r1, r5, r11, lsl #4 +; CHECK-NEXT: strd r0, r1, [r4, #16] +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r7, #15 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: str r2, [r4, #8] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: str r1, [r4, #4] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: and r1, r7, #15 ; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: lsrl r0, r9, #28 -; CHECK-NEXT: and r1, r11, #15 -; CHECK-NEXT: str.w r0, [r4, #41] -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: lsrl r0, r5, #28 -; CHECK-NEXT: str r0, [r4, #16] -; CHECK-NEXT: orr.w r0, r9, r8, lsl #4 -; CHECK-NEXT: lsrl r8, r1, #28 -; CHECK-NEXT: str.w r0, [r4, #45] -; CHECK-NEXT: strb.w r8, [r4, #49] -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: and r0, r6, #15 +; CHECK-NEXT: lsrl r10, r1, #28 +; CHECK-NEXT: orr.w r0, r0, r8, lsl #4 +; CHECK-NEXT: strb.w r10, [r4, #49] +; CHECK-NEXT: and r1, r9, #15 ; CHECK-NEXT: str.w r0, [r4, #37] -; CHECK-NEXT: orr.w r0, r5, r6, lsl #4 -; CHECK-NEXT: str r0, [r4, #20] -; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r6, r1, #28 -; CHECK-NEXT: strb r6, [r4, #24] +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: strb r0, [r4, #24] ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: and r0, r3, #15 -; CHECK-NEXT: orr.w r0, r0, r10, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 ; CHECK-NEXT: str r0, [r4, #12] -; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -1753,10 +1716,8 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vmov r5, s18 ; CHECK-NEXT: vldr s20, .LCPI31_0 ; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: add.w r12, r4, #48 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 @@ -1764,68 +1725,65 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vcmp.f32 s19, s20 +; CHECK-NEXT: add.w r12, r4, #48 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: vmov r5, s18 ; CHECK-NEXT: vmov r7, s16 ; CHECK-NEXT: vmov r6, s17 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r3, r3, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: add.w r12, r4, #32 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r3, r3, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s17, #0 ; CHECK-NEXT: add.w r12, r4, #16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r3, r3, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r3, r3, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: add sp, #4 @@ -1888,16 +1846,15 @@ define arm_aapcs_vfpcc <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) { ; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #1 -; CHECK-NEXT: and r0, r7, #1 ; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload -; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: csinc r0, r7, zr, ne ; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: bfi r7, r0, #0, #1 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: and r0, r0, #1 ; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: bfi r7, r0, #0, #1 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r6 @@ -1911,9 +1868,8 @@ define arm_aapcs_vfpcc <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) { ; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #1 -; CHECK-NEXT: and r0, r4, #1 +; CHECK-NEXT: csinc r0, r4, zr, ne +; CHECK-NEXT: and r0, r0, #1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r7, r0, #1, #1 ; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload @@ -2288,13 +2244,13 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x doubl ; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: itt ne +; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r8, #0 -; CHECK-NEXT: movne.w r9, #-1 ; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: csinv r9, r9, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r5 @@ -2302,18 +2258,18 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x doubl ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r7, r1, r6, ne -; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: csel r7, r0, r6, ne +; CHECK-NEXT: csel r6, r1, r6, ne ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r6, r9 -; CHECK-NEXT: vmov q0[3], q0[1], r7, r8 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: csinv r0, r7, zr, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r9 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r8 ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 @@ -2343,23 +2299,23 @@ define arm_aapcs_vfpcc <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) { ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI38_0 -; CHECK-NEXT: vmov r11, r5, d8 +; CHECK-NEXT: vmov r11, r10, d8 ; CHECK-NEXT: vmov r6, r7, d0 -; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vldr d0, .LCPI38_1 ; CHECK-NEXT: vmov r5, r8, d9 ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: vmov r3, r2, d0 -; CHECK-NEXT: csel r10, r0, r4, ne +; CHECK-NEXT: csel r0, r0, r4, ne +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: csel r9, r1, r4, ne ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r8 @@ -2382,21 +2338,22 @@ define arm_aapcs_vfpcc <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) { ; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ittt ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: movwne r6, #65535 -; CHECK-NEXT: movtne r6, #3 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: csinv r4, r4, zr, ne +; CHECK-NEXT: itt ne +; CHECK-NEXT: movwne r6, #65535 +; CHECK-NEXT: movtne r6, #3 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ittt ne +; CHECK-NEXT: itt ne ; CHECK-NEXT: movwne r9, #65535 ; CHECK-NEXT: movtne r9, #3 -; CHECK-NEXT: movne.w r10, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r10, r4 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: csinv r0, r0, zr, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 ; CHECK-NEXT: vmov q0[3], q0[1], r9, r6 ; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} @@ -2423,60 +2380,64 @@ define arm_aapcs_vfpcc <2 x i64> @test_unsigned_v2f64_v2i64(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI39_0 -; CHECK-NEXT: vmov r6, r7, d9 -; CHECK-NEXT: vmov r11, r3, d0 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: vmov r11, r10, d8 +; CHECK-NEXT: vmov r6, r7, d0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vldr d0, .LCPI39_1 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: csel r9, r0, r8, ne -; CHECK-NEXT: csel r8, r1, r8, ne -; CHECK-NEXT: vmov r10, r3, d0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: vmov r5, r4, d8 -; CHECK-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movne.w r8, #-1 -; CHECK-NEXT: movne.w r9, #-1 -; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vmov r5, r8, d9 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: vmov r9, r2, d0 +; CHECK-NEXT: csel r1, r1, r4, ne +; CHECK-NEXT: csel r0, r0, r4, ne +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r7, r1, r6, ne -; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r6, r0, r4, ne +; CHECK-NEXT: csel r4, r1, r4, ne ; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: movne.w r7, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r6, r9 -; CHECK-NEXT: vmov q0[3], q0[1], r7, r8 -; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: csinv r4, r4, zr, ne +; CHECK-NEXT: csinv r5, r6, zr, ne +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: csinv r0, r0, zr, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: csinv r0, r0, zr, ne +; CHECK-NEXT: vmov q0[3], q0[1], r0, r4 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2501,191 +2462,188 @@ define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #48 -; CHECK-NEXT: sub sp, #48 +; CHECK-NEXT: .pad #56 +; CHECK-NEXT: sub sp, #56 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI40_0 -; CHECK-NEXT: vmov r11, r4, d8 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r9, r2 +; CHECK-NEXT: vmov r4, r5, d9 +; CHECK-NEXT: str r0, [sp, #52] @ 4-byte Spill +; CHECK-NEXT: vmov r8, r9, d0 +; CHECK-NEXT: str.w r8, [sp, #48] @ 4-byte Spill +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r5, r4 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: vldr d0, .LCPI40_1 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: csel r4, r2, r8, ne -; CHECK-NEXT: vmov r10, r3, d0 -; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: strd r2, r3, [sp, #28] @ 8-byte Folded Spill +; CHECK-NEXT: csel r10, r1, r7, ne +; CHECK-NEXT: vmov r6, r3, d0 +; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: str r5, [sp, #40] @ 4-byte Spill -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r8, r3 +; CHECK-NEXT: str r6, [sp, #40] @ 4-byte Spill +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r7, r3 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r6, #8] -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: ldr r7, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: csinv r11, r10, zr, ne ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: csel r8, r1, r0, ne +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: str.w r10, [sp, #44] @ 4-byte Spill +; CHECK-NEXT: mov r10, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: ldr r7, [sp, #48] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r6, #4] -; CHECK-NEXT: mov r5, r6 -; CHECK-NEXT: str r6, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: ldr r6, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: str.w r11, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: csinv r8, r8, zr, ne +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: str.w r8, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: lsrl r8, r11, #28 +; CHECK-NEXT: mov r6, r9 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r6, r8 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: csel r9, r1, r0, ne +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: ldr r5, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vmov r8, r11, d9 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r5] -; CHECK-NEXT: mov r10, r9 -; CHECK-NEXT: str.w r9, [sp, #32] @ 4-byte Spill -; CHECK-NEXT: mov r5, r7 +; CHECK-NEXT: ldr r1, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: csinv r0, r9, zr, ne +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: orr.w r0, r11, r0, lsl #4 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r11, r7 +; CHECK-NEXT: strd r8, r0, [r1, #16] +; CHECK-NEXT: vmov r8, r4, d8 +; CHECK-NEXT: str r6, [sp, #44] @ 4-byte Spill ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: ldr r4, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: strd r3, r0, [sp, #16] @ 8-byte Folded Spill -; CHECK-NEXT: csel r7, r1, r9, ne -; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: csel r7, r2, r7, ne +; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: str r6, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: csinv r0, r7, zr, ne +; CHECK-NEXT: ldr r7, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: str r0, [r7, #8] ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: csel r9, r1, r0, ne +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: csel r11, r1, r0, ne ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r9, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr.w r9, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: lsrl r0, r7, #28 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: str.w r0, [r9, #16] +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: csinv r0, r11, zr, ne +; CHECK-NEXT: mov r10, r7 +; CHECK-NEXT: str r0, [r7, #4] ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: ldr r6, [sp, #48] @ 4-byte Reload +; CHECK-NEXT: ldr r5, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r7, [sp, #40] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: csel r10, r1, r0, ne +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: csel r11, r1, r0, ne ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #-1 -; CHECK-NEXT: orr.w r0, r7, r10, lsl #4 -; CHECK-NEXT: str.w r0, [r9, #20] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: csinv r0, r11, zr, ne ; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str.w r0, [r10] +; CHECK-NEXT: ldr.w r11, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr.w r10, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r5, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #32] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: csel r7, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r8, r6 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: csel r4, r1, r0, ne +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r5, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #15 -; CHECK-NEXT: and r1, r7, #15 -; CHECK-NEXT: lsrl r10, r1, #28 -; CHECK-NEXT: strb.w r10, [r9, #24] -; CHECK-NEXT: ldr r6, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: ldr r4, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: movne r4, #15 +; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: and r1, r4, #15 +; CHECK-NEXT: ldr r7, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: strb r0, [r7, #24] +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldrd r3, r2, [sp, #44] @ 8-byte Folded Reload +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r7, r1, r0, ne -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: csel r4, r1, r0, ne +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #15 -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: and r0, r7, #15 +; CHECK-NEXT: movne r4, #15 +; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: and r0, r4, #15 ; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 -; CHECK-NEXT: str.w r0, [r9, #12] -; CHECK-NEXT: add sp, #48 +; CHECK-NEXT: str r0, [r7, #12] +; CHECK-NEXT: add sp, #56 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2710,17 +2668,16 @@ define arm_aapcs_vfpcc <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #40 +; CHECK-NEXT: sub sp, #40 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI41_0 ; CHECK-NEXT: vmov r8, r7, d9 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r2, r9, d0 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r11, r2 +; CHECK-NEXT: strd r2, r3, [sp, #32] @ 8-byte Folded Spill ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r8 @@ -2728,156 +2685,152 @@ define arm_aapcs_vfpcc <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) { ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: vldr d0, .LCPI41_1 ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: strd r1, r0, [sp, #24] @ 8-byte Folded Spill +; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: vmov r5, r11, d0 ; CHECK-NEXT: csel r6, r3, r6, ne -; CHECK-NEXT: vmov r10, r5, d0 -; CHECK-NEXT: str r2, [sp] @ 4-byte Spill ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: str r5, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: str r6, [r4, #28] -; CHECK-NEXT: str.w r11, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: csinv r0, r6, zr, ne +; CHECK-NEXT: str.w r0, [r9, #28] +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: ldr r6, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: csel r5, r1, r0, ne -; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: csel r10, r10, r0, ne +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r5, #-1 -; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: str r5, [r4, #24] -; CHECK-NEXT: mov r5, r4 -; CHECK-NEXT: mov r4, r9 +; CHECK-NEXT: csinv r0, r10, zr, ne +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: str.w r0, [r9, #24] +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r10, r9 +; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: csel r9, r1, r0, ne ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r6, r5 +; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str.w r11, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: vmov r1, r11, d8 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r9, #-1 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: str.w r9, [r5, #20] +; CHECK-NEXT: csinv r0, r9, zr, ne +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: vmov r6, r11, d8 -; CHECK-NEXT: mov r9, r4 +; CHECK-NEXT: str.w r0, [r10, #20] +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: ldr r5, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr.w r9, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: csel r10, r1, r0, ne ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r5, #16] -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: csinv r0, r10, zr, ne ; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str r0, [r7, #16] +; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __fixunsdfti +; CHECK-NEXT: ldr.w r10, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: strd r1, r0, [sp, #24] @ 8-byte Folded Spill ; CHECK-NEXT: csel r4, r3, r8, ne -; CHECK-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: mov r8, r7 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: csinv r0, r4, zr, ne +; CHECK-NEXT: str r0, [r7, #12] +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: ldr r4, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: str r4, [r7, #12] +; CHECK-NEXT: mov r3, r4 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r4, r1, r0, ne +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: csel r8, r1, r0, ne ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: csinv r0, r8, zr, ne ; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r7, #8] +; CHECK-NEXT: str r0, [r7, #8] +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: mov r8, r4 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: csinv r0, r4, zr, ne ; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r7, #4] +; CHECK-NEXT: str r0, [r7, #4] +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r7] -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: csinv r0, r4, zr, ne +; CHECK-NEXT: str r0, [r7] +; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -3335,7 +3288,6 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) { ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vcvtb.f32.f16 s22, s18 -; CHECK-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcvtt.f32.f16 s26, s17 @@ -3344,184 +3296,173 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) { ; CHECK-NEXT: vcvtt.f32.f16 s20, s18 ; CHECK-NEXT: vcvtb.f32.f16 s24, s17 ; CHECK-NEXT: vcmp.f32 s22, #0 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmov r8, s20 +; CHECK-NEXT: vmov r9, s20 ; CHECK-NEXT: vldr s18, .LCPI48_0 -; CHECK-NEXT: vmov r9, s24 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: it lt +; CHECK-NEXT: vmov r8, s24 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: bl __aeabi_f2ulz +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vcmp.f32 s26, #0 +; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s24, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s18 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s24, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vcmp.f32 s26, s18 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: vcmp.f32 s22, s18 ; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: csinv r0, r5, zr, gt +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #3 -; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: movwgt r7, #65535 +; CHECK-NEXT: movtgt r7, #3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: str.w r6, [r10, #25] +; CHECK-NEXT: str r7, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: csinv r0, r6, zr, gt +; CHECK-NEXT: str.w r0, [r10, #25] +; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r4, #65535 ; CHECK-NEXT: movtgt r4, #3 -; CHECK-NEXT: str r4, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s20, #0 -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s20, s18 -; CHECK-NEXT: vcvtb.f32.f16 s20, s19 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vmov r0, s20 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: vcvtb.f32.f16 s20, s19 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s20 +; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r5, #65535 ; CHECK-NEXT: movtgt r5, #3 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s20, #0 -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s20, s18 -; CHECK-NEXT: vcvtt.f32.f16 s20, s19 -; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: vmov r0, s20 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: movwgt r11, #65535 -; CHECK-NEXT: movtgt r11, #3 +; CHECK-NEXT: vcvtt.f32.f16 s20, s19 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r7, #65535 +; CHECK-NEXT: movtgt r7, #3 +; CHECK-NEXT: csinv r6, r0, zr, gt +; CHECK-NEXT: vmov r0, s20 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s20, #0 -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s20, s18 -; CHECK-NEXT: vcvtb.f32.f16 s20, s16 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: vmov r0, s20 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movwgt r7, #65535 -; CHECK-NEXT: movtgt r7, #3 +; CHECK-NEXT: vcvtb.f32.f16 s20, s16 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r9, #65535 +; CHECK-NEXT: movtgt r9, #3 +; CHECK-NEXT: csinv r8, r0, zr, gt +; CHECK-NEXT: vmov r0, s20 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s20, #0 -; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s20, s18 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt.w r11, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s20, s18 -; CHECK-NEXT: bfc r11, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: lsrl r2, r11, #28 +; CHECK-NEXT: bfc r7, #18, #14 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: lsrl r2, r7, #28 ; CHECK-NEXT: bfc r5, #18, #14 +; CHECK-NEXT: bfc r4, #18, #14 ; CHECK-NEXT: vcvtt.f32.f16 s16, s16 -; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: lsr.w r0, r7, #10 -; CHECK-NEXT: bfc r7, #18, #14 -; CHECK-NEXT: lsll r10, r7, #22 -; CHECK-NEXT: orr.w r1, r11, r7 -; CHECK-NEXT: str.w r1, [r4, #45] -; CHECK-NEXT: orr.w r1, r2, r10 -; CHECK-NEXT: str.w r1, [r4, #41] -; CHECK-NEXT: strb.w r0, [r4, #49] -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: str.w r0, [r10] +; CHECK-NEXT: lsr.w r0, r9, #10 +; CHECK-NEXT: bfc r9, #18, #14 +; CHECK-NEXT: lsll r8, r9, #22 +; CHECK-NEXT: orr.w r1, r7, r9 +; CHECK-NEXT: str.w r1, [r10, #45] +; CHECK-NEXT: orr.w r1, r2, r8 +; CHECK-NEXT: str.w r1, [r10, #41] +; CHECK-NEXT: strb.w r0, [r10, #49] +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: lsrl r0, r5, #14 -; CHECK-NEXT: mov r7, r4 -; CHECK-NEXT: orr.w r1, r5, r9, lsl #4 -; CHECK-NEXT: str.w r1, [r4, #37] -; CHECK-NEXT: str.w r0, [r4, #33] -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: bfc r0, #18, #14 -; CHECK-NEXT: orr.w r0, r0, r6, lsl #18 -; CHECK-NEXT: str.w r0, [r4, #29] +; CHECK-NEXT: orr.w r1, r5, r6, lsl #4 +; CHECK-NEXT: str.w r1, [r10, #37] +; CHECK-NEXT: str.w r0, [r10, #33] +; CHECK-NEXT: orr.w r0, r4, r2, lsl #18 +; CHECK-NEXT: str.w r0, [r10, #29] ; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr.w r9, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: bfc r1, #18, #14 ; CHECK-NEXT: bfc r3, #18, #14 -; CHECK-NEXT: mov r6, r9 -; CHECK-NEXT: lsll r4, r1, #22 -; CHECK-NEXT: lsrl r6, r3, #28 +; CHECK-NEXT: mov r4, r5 +; CHECK-NEXT: lsll r6, r1, #22 +; CHECK-NEXT: lsrl r4, r3, #28 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r8, #65535 -; CHECK-NEXT: movtgt r8, #3 +; CHECK-NEXT: movwgt r11, #65535 +; CHECK-NEXT: movtgt r11, #3 ; CHECK-NEXT: orrs r1, r3 -; CHECK-NEXT: str r1, [r7, #20] +; CHECK-NEXT: str.w r1, [r10, #20] ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: orr.w r2, r6, r4 +; CHECK-NEXT: orr.w r2, r4, r6 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s18 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vcmp.f32 s16, s18 -; CHECK-NEXT: bfc r8, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #3 -; CHECK-NEXT: str r2, [r7, #16] -; CHECK-NEXT: lsr.w r2, r5, #10 -; CHECK-NEXT: strb r2, [r7, #24] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: str.w r2, [r10, #16] +; CHECK-NEXT: lsr.w r2, r7, #10 ; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: strb.w r2, [r10, #24] +; CHECK-NEXT: bfc r11, #18, #14 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: orr.w r0, r11, r0, lsl #18 ; CHECK-NEXT: lsrl r2, r1, #14 -; CHECK-NEXT: orr.w r0, r8, r0, lsl #18 -; CHECK-NEXT: orr.w r1, r1, r9, lsl #4 -; CHECK-NEXT: strd r2, r1, [r7, #8] -; CHECK-NEXT: str r0, [r7, #4] +; CHECK-NEXT: orr.w r1, r1, r5, lsl #4 +; CHECK-NEXT: strd r2, r1, [r10, #8] +; CHECK-NEXT: str.w r0, [r10, #4] ; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 @@ -3541,139 +3482,125 @@ define arm_aapcs_vfpcc <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtt.f32.f16 s22, s19 -; CHECK-NEXT: vmov r0, s22 +; CHECK-NEXT: vcvtb.f32.f16 s20, s18 +; CHECK-NEXT: vmov r0, s20 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcvtb.f32.f16 s26, s19 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r0, s26 ; CHECK-NEXT: vldr s28, .LCPI49_0 -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: vcmp.f32 s20, #0 +; CHECK-NEXT: vcvtb.f32.f16 s22, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcvtt.f32.f16 s20, s16 -; CHECK-NEXT: vcvtt.f32.f16 s24, s18 +; CHECK-NEXT: vcvtt.f32.f16 s16, s16 +; CHECK-NEXT: vcmp.f32 s20, s28 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: vcmp.f32 s22, s28 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r6, s20 -; CHECK-NEXT: vmov r4, s24 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: vcvtt.f32.f16 s18, s18 +; CHECK-NEXT: vmov r11, s18 +; CHECK-NEXT: vmov r5, s22 +; CHECK-NEXT: csinv r9, r0, zr, gt +; CHECK-NEXT: csinv r8, r1, zr, gt +; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: vcmp.f32 s16, s28 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: vcmp.f32 s26, s28 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: csinv r7, r0, zr, gt +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: csinv r10, r1, zr, gt ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: vcmp.f32 s22, #0 +; CHECK-NEXT: vcvtt.f32.f16 s16, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: vcmp.f32 s22, s28 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vcmp.f32 s24, s28 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r4, #-1 -; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: vmov r5, s16 +; CHECK-NEXT: vcvtb.f32.f16 s24, s17 +; CHECK-NEXT: csinv r4, r0, zr, gt +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: csinv r6, r1, zr, gt ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vcmp.f32 s20, #0 +; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: vmov q5[2], q5[0], r4, r7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vcmp.f32 s20, s28 +; CHECK-NEXT: vcmp.f32 s18, s28 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: vmov q5[3], q5[1], r6, r10 +; CHECK-NEXT: vmov r4, s24 +; CHECK-NEXT: csinv r7, r0, zr, gt +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: csinv r10, r1, zr, gt ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s28 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcvtt.f32.f16 s16, s17 -; CHECK-NEXT: vmov q5[2], q5[0], r0, r7 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmov q5[3], q5[1], r1, r6 +; CHECK-NEXT: csinv r6, r0, zr, gt +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: csinv r5, r1, zr, gt ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: vcvtt.f32.f16 s16, s19 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: vcvtb.f32.f16 s16, s17 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vcmp.f32 s24, s28 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: csinv r2, r0, zr, gt +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vmov q6[2], q6[0], r2, r6 +; CHECK-NEXT: csinv r4, r1, zr, gt ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 +; CHECK-NEXT: vcvtb.f32.f16 s30, s19 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s28 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: vmov r2, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcvtb.f32.f16 s16, s18 -; CHECK-NEXT: vmov q6[2], q6[0], r0, r7 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmov q6[3], q6[1], r1, r6 +; CHECK-NEXT: vmov q6[3], q6[1], r4, r5 +; CHECK-NEXT: vmov q4[2], q4[0], r9, r7 +; CHECK-NEXT: vmov q4[3], q4[1], r8, r10 +; CHECK-NEXT: csinv r5, r0, zr, gt +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: csinv r4, r1, zr, gt ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: vmov q3[2], q3[0], r10, r9 +; CHECK-NEXT: vcmp.f32 s30, #0 +; CHECK-NEXT: vmov q0, q5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s30, s28 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: vmov q3[3], q3[1], r11, r8 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmov q2[2], q2[0], r0, r5 -; CHECK-NEXT: vmov q0, q5 -; CHECK-NEXT: vmov q2[3], q2[1], r1, r4 ; CHECK-NEXT: vmov q1, q6 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: vmov q2, q4 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: vmov q3[2], q3[0], r0, r5 +; CHECK-NEXT: csinv r0, r1, zr, gt +; CHECK-NEXT: vmov q3[3], q3[1], r0, r4 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 2 @@ -3693,258 +3620,248 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12} -; CHECK-NEXT: .pad #56 -; CHECK-NEXT: sub sp, #56 +; CHECK-NEXT: .pad #80 +; CHECK-NEXT: sub sp, #80 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtb.f32.f16 s22, s17 +; CHECK-NEXT: vcvtt.f32.f16 s22, s16 ; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcvtb.f32.f16 s24, s18 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: vldr s20, .LCPI50_0 ; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: mov r10, r2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcvtb.f32.f16 s24, s17 +; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov r7, s24 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: str r3, [sp, #64] @ 4-byte Spill +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: str r0, [sp, #72] @ 4-byte Spill +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: str r2, [sp, #56] @ 4-byte Spill +; CHECK-NEXT: str r1, [sp, #52] @ 4-byte Spill ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcvtb.f32.f16 s22, s19 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: mov r7, r2 +; CHECK-NEXT: vcvtt.f32.f16 s22, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #48] @ 4-byte Spill -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: str r3, [sp, #60] @ 4-byte Spill +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: str r0, [sp, #44] @ 4-byte Spill +; CHECK-NEXT: csinv r0, r1, zr, gt +; CHECK-NEXT: str r0, [sp, #40] @ 4-byte Spill +; CHECK-NEXT: csinv r0, r2, zr, gt +; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s22, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str.w r2, [r4, #83] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: str.w r1, [r4, #79] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcvtt.f32.f16 s22, s16 -; CHECK-NEXT: str.w r0, [r4, #75] -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: str.w r7, [r4, #58] -; CHECK-NEXT: str.w r6, [r4, #54] -; CHECK-NEXT: str.w r5, [r4, #50] -; CHECK-NEXT: str.w r10, [r4, #33] -; CHECK-NEXT: str.w r9, [r4, #29] -; CHECK-NEXT: str.w r8, [r4, #25] +; CHECK-NEXT: vcvtb.f32.f16 s22, s18 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #48] @ 4-byte Spill +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: str r0, [sp, #76] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s22 +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s22, #0 +; CHECK-NEXT: vcvtt.f32.f16 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str r2, [sp, #36] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: str r1, [sp, #32] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcvtt.f32.f16 s22, s17 -; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: vmov r0, s22 +; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: csinv r0, r1, zr, gt +; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: csinv r0, r2, zr, gt +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: mov r6, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: vcvtt.f32.f16 s18, s18 +; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcvtb.f32.f16 s18, s19 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: movgt r6, #15 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: str r0, [sp, #68] @ 4-byte Spill ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: csinv r9, r1, zr, gt ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: mov r7, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vcvtt.f32.f16 s18, s19 -; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r7, #15 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r8, r1, zr, gt +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: csinv r11, r2, zr, gt ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r10, r2 -; CHECK-NEXT: mov r7, r3 +; CHECK-NEXT: mov r5, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt r7, #15 -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: csinv r10, r2, zr, gt +; CHECK-NEXT: orr.w r1, r1, r10, lsl #4 +; CHECK-NEXT: str.w r1, [r4, #95] +; CHECK-NEXT: str.w r0, [r4, #91] +; CHECK-NEXT: str.w r11, [r4, #83] +; CHECK-NEXT: str.w r8, [r4, #79] +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: str.w r0, [r4, #75] +; CHECK-NEXT: ldr r0, [sp, #68] @ 4-byte Reload +; CHECK-NEXT: ldr.w r8, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: lsrl r0, r9, #28 +; CHECK-NEXT: orr.w r1, r9, r8, lsl #4 +; CHECK-NEXT: str.w r1, [r4, #70] +; CHECK-NEXT: str.w r0, [r4, #66] +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: str.w r0, [r4, #58] +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: str.w r0, [r4, #54] +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: str.w r0, [r4, #50] +; CHECK-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: ldr.w r11, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: orr.w r1, r1, r11, lsl #4 +; CHECK-NEXT: str.w r1, [r4, #45] +; CHECK-NEXT: str.w r0, [r4, #41] +; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: str.w r0, [r4, #33] +; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: str.w r0, [r4, #29] +; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: str.w r0, [r4, #25] +; CHECK-NEXT: ldr r0, [sp, #72] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: ldr.w r9, [sp, #56] @ 4-byte Reload +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: orr.w r1, r1, r9, lsl #4 +; CHECK-NEXT: strd r0, r1, [r4, #16] +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r5, #15 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: str r2, [r4, #8] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: csinv r0, r0, zr, gt ; CHECK-NEXT: str r1, [r4, #4] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: lsrl r0, r11, #28 -; CHECK-NEXT: and r1, r7, #15 -; CHECK-NEXT: str.w r0, [r4, #91] -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: lsrl r0, r9, #28 -; CHECK-NEXT: str.w r0, [r4, #66] -; CHECK-NEXT: ldr.w lr, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r0, lr -; CHECK-NEXT: lsrl r0, r5, #28 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: str.w r0, [r4, #41] -; CHECK-NEXT: ldr.w r12, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r0, r12 -; CHECK-NEXT: lsrl r0, r5, #28 -; CHECK-NEXT: str r0, [r4, #16] -; CHECK-NEXT: orr.w r0, r11, r10, lsl #4 +; CHECK-NEXT: and r1, r5, #15 ; CHECK-NEXT: lsrl r10, r1, #28 -; CHECK-NEXT: str.w r0, [r4, #95] +; CHECK-NEXT: str r0, [r4] ; CHECK-NEXT: strb.w r10, [r4, #99] -; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r6, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #87] -; CHECK-NEXT: orr.w r0, r9, r8, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #70] -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 +; CHECK-NEXT: and r0, r7, #15 +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 +; CHECK-NEXT: and r1, r6, #15 ; CHECK-NEXT: lsrl r8, r1, #28 +; CHECK-NEXT: str.w r0, [r4, #87] ; CHECK-NEXT: strb.w r8, [r4, #74] -; CHECK-NEXT: ldr r0, [sp, #48] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #68] @ 4-byte Reload ; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 ; CHECK-NEXT: str.w r0, [r4, #62] -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #45] -; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #48] @ 4-byte Reload ; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r4, #49] -; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: strb.w r0, [r4, #49] +; CHECK-NEXT: ldr r0, [sp, #60] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #76] @ 4-byte Reload ; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, lr, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 ; CHECK-NEXT: str.w r0, [r4, #37] -; CHECK-NEXT: ldr r2, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: orr.w r0, r5, r2, lsl #4 -; CHECK-NEXT: str r0, [r4, #20] -; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #64] @ 4-byte Reload ; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb r2, [r4, #24] +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: strb r0, [r4, #24] ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 +; CHECK-NEXT: ldr r1, [sp, #72] @ 4-byte Reload ; CHECK-NEXT: and r0, r3, #15 -; CHECK-NEXT: orr.w r0, r0, r12, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 ; CHECK-NEXT: str r0, [r4, #12] -; CHECK-NEXT: add sp, #56 +; CHECK-NEXT: add sp, #80 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -3979,44 +3896,40 @@ define arm_aapcs_vfpcc <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-NEXT: vcmp.f32 s24, #0 ; CHECK-NEXT: add.w r12, r4, #112 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: vcvtb.f32.f16 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: vmov r6, s26 +; CHECK-NEXT: vcvtb.f32.f16 s18, s18 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r3, r3, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: vmov r6, s26 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s28, #0 ; CHECK-NEXT: add.w r12, r4, #96 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: vcvtt.f32.f16 s24, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: vmov r7, s18 +; CHECK-NEXT: vcvtt.f32.f16 s24, s17 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r3, r3, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov r7, s18 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s26, #0 ; CHECK-NEXT: add.w r12, r4, #80 @@ -4028,108 +3941,99 @@ define arm_aapcs_vfpcc <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: vmov r6, s24 ; CHECK-NEXT: vcvtb.f32.f16 s26, s17 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r3, r3, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: add.w r12, r4, #64 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: vmov r7, s26 +; CHECK-NEXT: vcvtt.f32.f16 s16, s16 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r3, r3, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov r7, s26 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s24, #0 ; CHECK-NEXT: add.w r12, r4, #48 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: vmov r6, s16 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r3, r3, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: vmov r6, s16 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s26, #0 ; CHECK-NEXT: add.w r12, r4, #32 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r3, r3, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: add.w r12, r4, #16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r3, r3, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s22, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: csinv r0, r0, zr, gt +; CHECK-NEXT: csinv r3, r3, zr, gt +; CHECK-NEXT: csinv r2, r2, zr, gt +; CHECK-NEXT: csinv r1, r1, zr, gt ; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: add sp, #4 diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll index cba0f9cbba2ca..1b91e276c5c1b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -576,10 +576,10 @@ define i32 @arm_nn_mat_mul_core_4x_s8(i32 %row_elements, i32 %offset, ptr %row_b ; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: mov.w r10, #0 -; CHECK-NEXT: movs r6, #0 ; CHECK-NEXT: adds r7, r2, r1 ; CHECK-NEXT: add.w r1, r1, r1, lsl #1 ; CHECK-NEXT: add r1, r2 +; CHECK-NEXT: movs r6, #0 ; CHECK-NEXT: dlstp.8 lr, r0 ; CHECK-NEXT: .LBB4_2: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index 29b56639bd769..546c39fe11f4b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -665,8 +665,7 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: lsrl r0, r1, #31 ; CHECK-NEXT: subs.w r3, r0, #-1 ; CHECK-NEXT: sbcs r1, r1, #0 -; CHECK-NEXT: it hs -; CHECK-NEXT: movhs.w r0, #-1 +; CHECK-NEXT: csinv r0, r0, zr, hs ; CHECK-NEXT: str r0, [r2], #4 ; CHECK-NEXT: le lr, .LBB3_7 ; CHECK-NEXT: .LBB3_8: @ %for.cond.cleanup @@ -832,8 +831,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: lsrl r0, r1, #31 ; CHECK-NEXT: subs.w r2, r0, #-1 ; CHECK-NEXT: sbcs r1, r1, #0 -; CHECK-NEXT: it hs -; CHECK-NEXT: movhs.w r0, #-1 +; CHECK-NEXT: csinv r0, r0, zr, hs ; CHECK-NEXT: str r0, [r11], #4 ; CHECK-NEXT: le lr, .LBB4_7 ; CHECK-NEXT: .LBB4_8: @ %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll index bf0d92b5e0303..7280093375f12 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll @@ -487,23 +487,20 @@ for.cond.cleanup: ; preds = %vector.body define <2 x i64> @large_i128(<2 x double> %x) { ; CHECK-LABEL: large_i128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: subs r7, r2, #1 -; CHECK-NEXT: mov.w r9, #1 -; CHECK-NEXT: sbcs r7, r3, #0 ; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: sbcs r7, r3, #0 ; CHECK-NEXT: cset r7, lt ; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: csel r0, r0, r7, ne ; CHECK-NEXT: csel r3, r3, r7, ne ; CHECK-NEXT: csel r1, r1, r7, ne -; CHECK-NEXT: csel r2, r2, r9, ne +; CHECK-NEXT: csinc r2, r2, zr, eq ; CHECK-NEXT: rsbs r7, r0, #0 ; CHECK-NEXT: sbcs.w r7, r4, r1 ; CHECK-NEXT: sbcs.w r2, r4, r2 @@ -522,7 +519,7 @@ define <2 x i64> @large_i128(<2 x double> %x) { ; CHECK-NEXT: csel r0, r0, r5, ne ; CHECK-NEXT: csel r3, r3, r5, ne ; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: csel r2, r2, r9, ne +; CHECK-NEXT: csinc r2, r2, zr, eq ; CHECK-NEXT: rsbs r5, r0, #0 ; CHECK-NEXT: sbcs.w r5, r4, r1 ; CHECK-NEXT: sbcs.w r2, r4, r2 @@ -533,8 +530,7 @@ define <2 x i64> @large_i128(<2 x double> %x) { ; CHECK-NEXT: csel r3, r1, r3, ne ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv,