@@ -465,49 +465,33 @@ define <2 x fp128> @v2f128_fp128(<2 x fp128> %a, <2 x fp128> %b, <2 x fp128> %d,
465465; CHECK-GI-NEXT: .cfi_offset w30, -16
466466; CHECK-GI-NEXT: stp q3, q1, [sp] // 32-byte Folded Spill
467467; CHECK-GI-NEXT: mov v1.16b, v2.16b
468- ; CHECK-GI-NEXT: stp q4, q5 , [sp, #32] // 32-byte Folded Spill
469- ; CHECK-GI-NEXT: stp q6, q7 , [sp, #64] // 32-byte Folded Spill
468+ ; CHECK-GI-NEXT: stp q6, q4 , [sp, #32] // 32-byte Folded Spill
469+ ; CHECK-GI-NEXT: stp q7, q5 , [sp, #64] // 32-byte Folded Spill
470470; CHECK-GI-NEXT: bl __lttf2
471471; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
472- ; CHECK-GI-NEXT: cmp w0, #0
473- ; CHECK-GI-NEXT: cset w19, lt
472+ ; CHECK-GI-NEXT: mov w19, w0
474473; CHECK-GI-NEXT: bl __lttf2
475- ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
476- ; CHECK-GI-NEXT: cmp w0, #0
477- ; CHECK-GI-NEXT: bfi x19, x8, #32, #32
478- ; CHECK-GI-NEXT: cset w8, lt
479- ; CHECK-GI-NEXT: fmov x10, d0
480- ; CHECK-GI-NEXT: mov x11, v0.d[1]
481- ; CHECK-GI-NEXT: bfi x8, x8, #32, #32
482- ; CHECK-GI-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload
483- ; CHECK-GI-NEXT: lsl x9, x19, #63
484- ; CHECK-GI-NEXT: lsl x8, x8, #63
474+ ; CHECK-GI-NEXT: ldp q3, q2, [sp, #32] // 32-byte Folded Reload
475+ ; CHECK-GI-NEXT: cmp w19, #0
485476; CHECK-GI-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload
486- ; CHECK-GI-NEXT: asr x9, x9, #63
487- ; CHECK-GI-NEXT: fmov x12, d0
488- ; CHECK-GI-NEXT: mov x13, v0.d[1]
489- ; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
490- ; CHECK-GI-NEXT: fmov x14, d1
491- ; CHECK-GI-NEXT: asr x8, x8, #63
492- ; CHECK-GI-NEXT: and x10, x10, x9
493- ; CHECK-GI-NEXT: fmov x15, d0
494- ; CHECK-GI-NEXT: mov x16, v1.d[1]
495- ; CHECK-GI-NEXT: mov x17, v0.d[1]
496- ; CHECK-GI-NEXT: and x12, x12, x8
497- ; CHECK-GI-NEXT: bic x14, x14, x9
498- ; CHECK-GI-NEXT: bic x15, x15, x8
499- ; CHECK-GI-NEXT: orr x10, x10, x14
500- ; CHECK-GI-NEXT: orr x12, x12, x15
501- ; CHECK-GI-NEXT: mov v0.d[0], x10
502- ; CHECK-GI-NEXT: and x10, x11, x9
503- ; CHECK-GI-NEXT: mov v1.d[0], x12
504- ; CHECK-GI-NEXT: and x11, x13, x8
505- ; CHECK-GI-NEXT: bic x9, x16, x9
506- ; CHECK-GI-NEXT: bic x8, x17, x8
507- ; CHECK-GI-NEXT: orr x9, x10, x9
508- ; CHECK-GI-NEXT: orr x8, x11, x8
509- ; CHECK-GI-NEXT: mov v0.d[1], x9
510- ; CHECK-GI-NEXT: mov v1.d[1], x8
477+ ; CHECK-GI-NEXT: mov d0, v2.d[1]
478+ ; CHECK-GI-NEXT: mov d1, v3.d[1]
479+ ; CHECK-GI-NEXT: fcsel d2, d2, d3, lt
480+ ; CHECK-GI-NEXT: fmov x8, d2
481+ ; CHECK-GI-NEXT: fcsel d3, d0, d1, lt
482+ ; CHECK-GI-NEXT: ldp q5, q0, [sp, #64] // 32-byte Folded Reload
483+ ; CHECK-GI-NEXT: cmp w0, #0
484+ ; CHECK-GI-NEXT: mov d1, v0.d[1]
485+ ; CHECK-GI-NEXT: mov d4, v5.d[1]
486+ ; CHECK-GI-NEXT: fcsel d0, d0, d5, lt
487+ ; CHECK-GI-NEXT: fmov x9, d0
488+ ; CHECK-GI-NEXT: mov v0.d[0], x8
489+ ; CHECK-GI-NEXT: fmov x8, d3
490+ ; CHECK-GI-NEXT: fcsel d2, d1, d4, lt
491+ ; CHECK-GI-NEXT: mov v1.d[0], x9
492+ ; CHECK-GI-NEXT: fmov x9, d2
493+ ; CHECK-GI-NEXT: mov v0.d[1], x8
494+ ; CHECK-GI-NEXT: mov v1.d[1], x9
511495; CHECK-GI-NEXT: add sp, sp, #112
512496; CHECK-GI-NEXT: ret
513497entry:
@@ -567,77 +551,52 @@ define <3 x fp128> @v3f128_fp128(<3 x fp128> %a, <3 x fp128> %b, <3 x fp128> %d,
567551; CHECK-GI-NEXT: mov v1.16b, v3.16b
568552; CHECK-GI-NEXT: stp q5, q2, [sp, #32] // 32-byte Folded Spill
569553; CHECK-GI-NEXT: ldr q2, [sp, #192]
570- ; CHECK-GI-NEXT: str q7, [sp, #64] // 16-byte Folded Spill
571- ; CHECK-GI-NEXT: stp q6, q2, [sp, #80] // 32-byte Folded Spill
554+ ; CHECK-GI-NEXT: str q2, [sp, #144] // 16-byte Folded Spill
572555; CHECK-GI-NEXT: ldr q2, [sp, #208]
573- ; CHECK-GI-NEXT: str q2, [sp, #112 ] // 16 -byte Folded Spill
556+ ; CHECK-GI-NEXT: stp q2, q6, [sp, #64 ] // 32 -byte Folded Spill
574557; CHECK-GI-NEXT: ldr q2, [sp, #224]
575- ; CHECK-GI-NEXT: str q2, [sp, #128 ] // 16 -byte Folded Spill
558+ ; CHECK-GI-NEXT: stp q7, q2, [sp, #96 ] // 32 -byte Folded Spill
576559; CHECK-GI-NEXT: ldr q2, [sp, #240]
577- ; CHECK-GI-NEXT: str q2, [sp, #144 ] // 16-byte Folded Spill
560+ ; CHECK-GI-NEXT: str q2, [sp, #128 ] // 16-byte Folded Spill
578561; CHECK-GI-NEXT: bl __lttf2
579562; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
580- ; CHECK-GI-NEXT: cmp w0, #0
581- ; CHECK-GI-NEXT: cset w19, lt
563+ ; CHECK-GI-NEXT: mov w19, w0
582564; CHECK-GI-NEXT: bl __lttf2
583565; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
584- ; CHECK-GI-NEXT: cmp w0, #0
585- ; CHECK-GI-NEXT: cset w20, lt
566+ ; CHECK-GI-NEXT: mov w20, w0
586567; CHECK-GI-NEXT: bl __lttf2
587- ; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
588- ; CHECK-GI-NEXT: bfi x19, x8, #32, #32
589- ; CHECK-GI-NEXT: bfi x20, x8, #32, #32
590- ; CHECK-GI-NEXT: cmp w0, #0
568+ ; CHECK-GI-NEXT: ldp q5, q4, [sp, #64] // 32-byte Folded Reload
569+ ; CHECK-GI-NEXT: cmp w19, #0
570+ ; CHECK-GI-NEXT: ldp q7, q6, [sp, #96] // 32-byte Folded Reload
591571; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload
592- ; CHECK-GI-NEXT: fmov x8, d0
593- ; CHECK-GI-NEXT: mov x10, v0.d[1]
594- ; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
595- ; CHECK-GI-NEXT: cset w9, lt
596- ; CHECK-GI-NEXT: lsl x13, x19, #63
597- ; CHECK-GI-NEXT: lsl x14, x20, #63
598- ; CHECK-GI-NEXT: fmov x11, d0
599- ; CHECK-GI-NEXT: mov x12, v0.d[1]
600- ; CHECK-GI-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
601- ; CHECK-GI-NEXT: bfi x9, x8, #32, #32
602- ; CHECK-GI-NEXT: asr x13, x13, #63
603- ; CHECK-GI-NEXT: asr x14, x14, #63
604- ; CHECK-GI-NEXT: fmov x15, d0
605- ; CHECK-GI-NEXT: mov x16, v0.d[1]
606- ; CHECK-GI-NEXT: ldp q0, q1, [sp, #112] // 32-byte Folded Reload
607- ; CHECK-GI-NEXT: lsl x9, x9, #63
608- ; CHECK-GI-NEXT: and x8, x8, x13
609- ; CHECK-GI-NEXT: and x11, x11, x14
610- ; CHECK-GI-NEXT: asr x9, x9, #63
572+ ; CHECK-GI-NEXT: mov d0, v4.d[1]
573+ ; CHECK-GI-NEXT: mov d1, v5.d[1]
574+ ; CHECK-GI-NEXT: fcsel d4, d4, d5, lt
575+ ; CHECK-GI-NEXT: mov d2, v7.d[1]
576+ ; CHECK-GI-NEXT: mov d3, v6.d[1]
577+ ; CHECK-GI-NEXT: fmov x8, d4
578+ ; CHECK-GI-NEXT: fcsel d5, d0, d1, lt
579+ ; CHECK-GI-NEXT: cmp w20, #0
580+ ; CHECK-GI-NEXT: fcsel d1, d7, d6, lt
581+ ; CHECK-GI-NEXT: ldp q7, q0, [sp, #128] // 32-byte Folded Reload
582+ ; CHECK-GI-NEXT: fcsel d3, d2, d3, lt
583+ ; CHECK-GI-NEXT: cmp w0, #0
611584; CHECK-GI-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
612- ; CHECK-GI-NEXT: fmov x17, d0
613- ; CHECK-GI-NEXT: mov x18, v0.d[1]
614- ; CHECK-GI-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
615- ; CHECK-GI-NEXT: fmov x0, d1
616- ; CHECK-GI-NEXT: and x15, x15, x9
617- ; CHECK-GI-NEXT: mov x2, v1.d[1]
618- ; CHECK-GI-NEXT: fmov x1, d0
619- ; CHECK-GI-NEXT: mov x3, v0.d[1]
620- ; CHECK-GI-NEXT: bic x17, x17, x13
621- ; CHECK-GI-NEXT: bic x0, x0, x14
622- ; CHECK-GI-NEXT: orr x8, x8, x17
623- ; CHECK-GI-NEXT: bic x1, x1, x9
624- ; CHECK-GI-NEXT: orr x11, x11, x0
585+ ; CHECK-GI-NEXT: mov d2, v0.d[1]
586+ ; CHECK-GI-NEXT: mov d6, v7.d[1]
587+ ; CHECK-GI-NEXT: fcsel d7, d0, d7, lt
625588; CHECK-GI-NEXT: mov v0.d[0], x8
626- ; CHECK-GI-NEXT: orr x15, x15, x1
627- ; CHECK-GI-NEXT: mov v1.d[0], x11
628- ; CHECK-GI-NEXT: and x8, x10, x13
629- ; CHECK-GI-NEXT: mov v2.d[0], x15
630- ; CHECK-GI-NEXT: and x10, x12, x14
631- ; CHECK-GI-NEXT: and x11, x16, x9
632- ; CHECK-GI-NEXT: bic x12, x18, x13
633- ; CHECK-GI-NEXT: bic x13, x2, x14
634- ; CHECK-GI-NEXT: bic x9, x3, x9
635- ; CHECK-GI-NEXT: orr x8, x8, x12
636- ; CHECK-GI-NEXT: orr x10, x10, x13
637- ; CHECK-GI-NEXT: orr x9, x11, x9
589+ ; CHECK-GI-NEXT: fmov x8, d1
590+ ; CHECK-GI-NEXT: fmov x9, d7
591+ ; CHECK-GI-NEXT: fcsel d4, d2, d6, lt
592+ ; CHECK-GI-NEXT: mov v1.d[0], x8
593+ ; CHECK-GI-NEXT: fmov x8, d5
594+ ; CHECK-GI-NEXT: mov v2.d[0], x9
595+ ; CHECK-GI-NEXT: fmov x9, d3
596+ ; CHECK-GI-NEXT: fmov x10, d4
638597; CHECK-GI-NEXT: mov v0.d[1], x8
639- ; CHECK-GI-NEXT: mov v1.d[1], x10
640- ; CHECK-GI-NEXT: mov v2.d[1], x9
598+ ; CHECK-GI-NEXT: mov v1.d[1], x9
599+ ; CHECK-GI-NEXT: mov v2.d[1], x10
641600; CHECK-GI-NEXT: add sp, sp, #192
642601; CHECK-GI-NEXT: ret
643602entry:
0 commit comments