@@ -441,6 +441,291 @@ define i64 @fneg_fabs_select_i64_2(i64 %cond, i64 %a, i64 %b) {
441441 %select = select i1 %cmp , i64 %b , i64 %neg.a
442442 ret i64 %select
443443}
444+
445+ define i64 @s_fneg_select_i64_1 (i64 inreg %cond , i64 inreg %a , i64 inreg %b ) {
446+ ; GFX7-LABEL: s_fneg_select_i64_1:
447+ ; GFX7: ; %bb.0:
448+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449+ ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[16:17], 0
450+ ; GFX7-NEXT: s_xor_b32 s6, s19, 0x80000000
451+ ; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], exec
452+ ; GFX7-NEXT: s_cselect_b32 s4, s18, s20
453+ ; GFX7-NEXT: s_cselect_b32 s5, s6, s21
454+ ; GFX7-NEXT: v_mov_b32_e32 v0, s4
455+ ; GFX7-NEXT: v_mov_b32_e32 v1, s5
456+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
457+ ;
458+ ; GFX9-LABEL: s_fneg_select_i64_1:
459+ ; GFX9: ; %bb.0:
460+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
461+ ; GFX9-NEXT: s_xor_b32 s4, s19, 0x80000000
462+ ; GFX9-NEXT: s_cmp_eq_u64 s[16:17], 0
463+ ; GFX9-NEXT: s_cselect_b32 s5, s18, s20
464+ ; GFX9-NEXT: s_cselect_b32 s4, s4, s21
465+ ; GFX9-NEXT: v_mov_b32_e32 v0, s5
466+ ; GFX9-NEXT: v_mov_b32_e32 v1, s4
467+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
468+ ;
469+ ; GFX11-LABEL: s_fneg_select_i64_1:
470+ ; GFX11: ; %bb.0:
471+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
472+ ; GFX11-NEXT: s_xor_b32 s3, s3, 0x80000000
473+ ; GFX11-NEXT: s_cmp_eq_u64 s[0:1], 0
474+ ; GFX11-NEXT: s_cselect_b32 s0, s2, s16
475+ ; GFX11-NEXT: s_cselect_b32 s1, s3, s17
476+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
477+ ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
478+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
479+ %neg.a = xor i64 %a , u0x8000000000000000
480+ %cmp = icmp eq i64 %cond , zeroinitializer
481+ %select = select i1 %cmp , i64 %neg.a , i64 %b
482+ ret i64 %select
483+ }
484+
485+ define i64 @s_fneg_select_i64_2 (i64 inreg %cond , i64 inreg %a , i64 inreg %b ) {
486+ ; GFX7-LABEL: s_fneg_select_i64_2:
487+ ; GFX7: ; %bb.0:
488+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489+ ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[16:17], 0
490+ ; GFX7-NEXT: s_xor_b32 s6, s19, 0x80000000
491+ ; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], exec
492+ ; GFX7-NEXT: s_cselect_b32 s4, s20, s18
493+ ; GFX7-NEXT: s_cselect_b32 s5, s21, s6
494+ ; GFX7-NEXT: v_mov_b32_e32 v0, s4
495+ ; GFX7-NEXT: v_mov_b32_e32 v1, s5
496+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
497+ ;
498+ ; GFX9-LABEL: s_fneg_select_i64_2:
499+ ; GFX9: ; %bb.0:
500+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501+ ; GFX9-NEXT: s_xor_b32 s4, s19, 0x80000000
502+ ; GFX9-NEXT: s_cmp_eq_u64 s[16:17], 0
503+ ; GFX9-NEXT: s_cselect_b32 s5, s20, s18
504+ ; GFX9-NEXT: s_cselect_b32 s4, s21, s4
505+ ; GFX9-NEXT: v_mov_b32_e32 v0, s5
506+ ; GFX9-NEXT: v_mov_b32_e32 v1, s4
507+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
508+ ;
509+ ; GFX11-LABEL: s_fneg_select_i64_2:
510+ ; GFX11: ; %bb.0:
511+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512+ ; GFX11-NEXT: s_xor_b32 s3, s3, 0x80000000
513+ ; GFX11-NEXT: s_cmp_eq_u64 s[0:1], 0
514+ ; GFX11-NEXT: s_cselect_b32 s0, s16, s2
515+ ; GFX11-NEXT: s_cselect_b32 s1, s17, s3
516+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
517+ ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
518+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
519+ %neg.a = xor i64 %a , u0x8000000000000000
520+ %cmp = icmp eq i64 %cond , zeroinitializer
521+ %select = select i1 %cmp , i64 %b , i64 %neg.a
522+ ret i64 %select
523+ }
524+
525+ define i64 @s_fneg_1_fabs_2_select_i64 (i64 inreg %cond , i64 inreg %a , i64 inreg %b ) {
526+ ; GFX7-LABEL: s_fneg_1_fabs_2_select_i64:
527+ ; GFX7: ; %bb.0:
528+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529+ ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[16:17], 0
530+ ; GFX7-NEXT: s_xor_b32 s6, s19, 0x80000000
531+ ; GFX7-NEXT: s_bitset0_b32 s21, 31
532+ ; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], exec
533+ ; GFX7-NEXT: s_cselect_b32 s4, s18, s20
534+ ; GFX7-NEXT: s_cselect_b32 s5, s6, s21
535+ ; GFX7-NEXT: v_mov_b32_e32 v0, s4
536+ ; GFX7-NEXT: v_mov_b32_e32 v1, s5
537+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
538+ ;
539+ ; GFX9-LABEL: s_fneg_1_fabs_2_select_i64:
540+ ; GFX9: ; %bb.0:
541+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
542+ ; GFX9-NEXT: s_xor_b32 s4, s19, 0x80000000
543+ ; GFX9-NEXT: s_bitset0_b32 s21, 31
544+ ; GFX9-NEXT: s_cmp_eq_u64 s[16:17], 0
545+ ; GFX9-NEXT: s_cselect_b32 s5, s18, s20
546+ ; GFX9-NEXT: s_cselect_b32 s4, s4, s21
547+ ; GFX9-NEXT: v_mov_b32_e32 v0, s5
548+ ; GFX9-NEXT: v_mov_b32_e32 v1, s4
549+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
550+ ;
551+ ; GFX11-LABEL: s_fneg_1_fabs_2_select_i64:
552+ ; GFX11: ; %bb.0:
553+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554+ ; GFX11-NEXT: s_xor_b32 s3, s3, 0x80000000
555+ ; GFX11-NEXT: s_bitset0_b32 s17, 31
556+ ; GFX11-NEXT: s_cmp_eq_u64 s[0:1], 0
557+ ; GFX11-NEXT: s_cselect_b32 s0, s2, s16
558+ ; GFX11-NEXT: s_cselect_b32 s1, s3, s17
559+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
560+ ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
561+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
562+ %neg.a = xor i64 %a , u0x8000000000000000
563+ %abs.b = and i64 %b , u0x7fffffffffffffff
564+ %cmp = icmp eq i64 %cond , zeroinitializer
565+ %select = select i1 %cmp , i64 %neg.a , i64 %abs.b
566+ ret i64 %select
567+ }
568+
569+ define i64 @s_fabs_select_i64_1 (i64 inreg %cond , i64 inreg %a , i64 inreg %b ) {
570+ ; GFX7-LABEL: s_fabs_select_i64_1:
571+ ; GFX7: ; %bb.0:
572+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
573+ ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[16:17], 0
574+ ; GFX7-NEXT: s_bitset0_b32 s19, 31
575+ ; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], exec
576+ ; GFX7-NEXT: s_cselect_b32 s4, s18, s20
577+ ; GFX7-NEXT: s_cselect_b32 s5, s19, s21
578+ ; GFX7-NEXT: v_mov_b32_e32 v0, s4
579+ ; GFX7-NEXT: v_mov_b32_e32 v1, s5
580+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
581+ ;
582+ ; GFX9-LABEL: s_fabs_select_i64_1:
583+ ; GFX9: ; %bb.0:
584+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585+ ; GFX9-NEXT: s_bitset0_b32 s19, 31
586+ ; GFX9-NEXT: s_cmp_eq_u64 s[16:17], 0
587+ ; GFX9-NEXT: s_cselect_b32 s4, s18, s20
588+ ; GFX9-NEXT: s_cselect_b32 s5, s19, s21
589+ ; GFX9-NEXT: v_mov_b32_e32 v0, s4
590+ ; GFX9-NEXT: v_mov_b32_e32 v1, s5
591+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
592+ ;
593+ ; GFX11-LABEL: s_fabs_select_i64_1:
594+ ; GFX11: ; %bb.0:
595+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
596+ ; GFX11-NEXT: s_bitset0_b32 s3, 31
597+ ; GFX11-NEXT: s_cmp_eq_u64 s[0:1], 0
598+ ; GFX11-NEXT: s_cselect_b32 s0, s2, s16
599+ ; GFX11-NEXT: s_cselect_b32 s1, s3, s17
600+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
601+ ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
602+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
603+ %neg.a = and i64 %a , u0x7fffffffffffffff
604+ %cmp = icmp eq i64 %cond , zeroinitializer
605+ %select = select i1 %cmp , i64 %neg.a , i64 %b
606+ ret i64 %select
607+ }
608+
609+ define i64 @s_fabs_select_i64_2 (i64 inreg %cond , i64 inreg %a , i64 inreg %b ) {
610+ ; GFX7-LABEL: s_fabs_select_i64_2:
611+ ; GFX7: ; %bb.0:
612+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
613+ ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[16:17], 0
614+ ; GFX7-NEXT: s_bitset0_b32 s19, 31
615+ ; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], exec
616+ ; GFX7-NEXT: s_cselect_b32 s4, s20, s18
617+ ; GFX7-NEXT: s_cselect_b32 s5, s21, s19
618+ ; GFX7-NEXT: v_mov_b32_e32 v0, s4
619+ ; GFX7-NEXT: v_mov_b32_e32 v1, s5
620+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
621+ ;
622+ ; GFX9-LABEL: s_fabs_select_i64_2:
623+ ; GFX9: ; %bb.0:
624+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
625+ ; GFX9-NEXT: s_bitset0_b32 s19, 31
626+ ; GFX9-NEXT: s_cmp_eq_u64 s[16:17], 0
627+ ; GFX9-NEXT: s_cselect_b32 s4, s20, s18
628+ ; GFX9-NEXT: s_cselect_b32 s5, s21, s19
629+ ; GFX9-NEXT: v_mov_b32_e32 v0, s4
630+ ; GFX9-NEXT: v_mov_b32_e32 v1, s5
631+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
632+ ;
633+ ; GFX11-LABEL: s_fabs_select_i64_2:
634+ ; GFX11: ; %bb.0:
635+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
636+ ; GFX11-NEXT: s_bitset0_b32 s3, 31
637+ ; GFX11-NEXT: s_cmp_eq_u64 s[0:1], 0
638+ ; GFX11-NEXT: s_cselect_b32 s0, s16, s2
639+ ; GFX11-NEXT: s_cselect_b32 s1, s17, s3
640+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
641+ ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
642+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
643+ %neg.a = and i64 %a , u0x7fffffffffffffff
644+ %cmp = icmp eq i64 %cond , zeroinitializer
645+ %select = select i1 %cmp , i64 %b , i64 %neg.a
646+ ret i64 %select
647+ }
648+
649+ define i64 @s_fneg_fabs_select_i64_1 (i64 inreg %cond , i64 inreg %a , i64 inreg %b ) {
650+ ; GFX7-LABEL: s_fneg_fabs_select_i64_1:
651+ ; GFX7: ; %bb.0:
652+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
653+ ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[16:17], 0
654+ ; GFX7-NEXT: s_bitset1_b32 s19, 31
655+ ; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], exec
656+ ; GFX7-NEXT: s_cselect_b32 s4, s18, s20
657+ ; GFX7-NEXT: s_cselect_b32 s5, s19, s21
658+ ; GFX7-NEXT: v_mov_b32_e32 v0, s4
659+ ; GFX7-NEXT: v_mov_b32_e32 v1, s5
660+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
661+ ;
662+ ; GFX9-LABEL: s_fneg_fabs_select_i64_1:
663+ ; GFX9: ; %bb.0:
664+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
665+ ; GFX9-NEXT: s_bitset1_b32 s19, 31
666+ ; GFX9-NEXT: s_cmp_eq_u64 s[16:17], 0
667+ ; GFX9-NEXT: s_cselect_b32 s4, s18, s20
668+ ; GFX9-NEXT: s_cselect_b32 s5, s19, s21
669+ ; GFX9-NEXT: v_mov_b32_e32 v0, s4
670+ ; GFX9-NEXT: v_mov_b32_e32 v1, s5
671+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
672+ ;
673+ ; GFX11-LABEL: s_fneg_fabs_select_i64_1:
674+ ; GFX11: ; %bb.0:
675+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
676+ ; GFX11-NEXT: s_bitset1_b32 s3, 31
677+ ; GFX11-NEXT: s_cmp_eq_u64 s[0:1], 0
678+ ; GFX11-NEXT: s_cselect_b32 s0, s2, s16
679+ ; GFX11-NEXT: s_cselect_b32 s1, s3, s17
680+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
681+ ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
682+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
683+ %neg.a = or i64 %a , u0x8000000000000000
684+ %cmp = icmp eq i64 %cond , zeroinitializer
685+ %select = select i1 %cmp , i64 %neg.a , i64 %b
686+ ret i64 %select
687+ }
688+
689+ define i64 @s_fneg_fabs_select_i64_2 (i64 inreg %cond , i64 inreg %a , i64 inreg %b ) {
690+ ; GFX7-LABEL: s_fneg_fabs_select_i64_2:
691+ ; GFX7: ; %bb.0:
692+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
693+ ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[16:17], 0
694+ ; GFX7-NEXT: s_bitset1_b32 s19, 31
695+ ; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], exec
696+ ; GFX7-NEXT: s_cselect_b32 s4, s20, s18
697+ ; GFX7-NEXT: s_cselect_b32 s5, s21, s19
698+ ; GFX7-NEXT: v_mov_b32_e32 v0, s4
699+ ; GFX7-NEXT: v_mov_b32_e32 v1, s5
700+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
701+ ;
702+ ; GFX9-LABEL: s_fneg_fabs_select_i64_2:
703+ ; GFX9: ; %bb.0:
704+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
705+ ; GFX9-NEXT: s_bitset1_b32 s19, 31
706+ ; GFX9-NEXT: s_cmp_eq_u64 s[16:17], 0
707+ ; GFX9-NEXT: s_cselect_b32 s4, s20, s18
708+ ; GFX9-NEXT: s_cselect_b32 s5, s21, s19
709+ ; GFX9-NEXT: v_mov_b32_e32 v0, s4
710+ ; GFX9-NEXT: v_mov_b32_e32 v1, s5
711+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
712+ ;
713+ ; GFX11-LABEL: s_fneg_fabs_select_i64_2:
714+ ; GFX11: ; %bb.0:
715+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
716+ ; GFX11-NEXT: s_bitset1_b32 s3, 31
717+ ; GFX11-NEXT: s_cmp_eq_u64 s[0:1], 0
718+ ; GFX11-NEXT: s_cselect_b32 s0, s16, s2
719+ ; GFX11-NEXT: s_cselect_b32 s1, s17, s3
720+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
721+ ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
722+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
723+ %neg.a = or i64 %a , u0x8000000000000000
724+ %cmp = icmp eq i64 %cond , zeroinitializer
725+ %select = select i1 %cmp , i64 %b , i64 %neg.a
726+ ret i64 %select
727+ }
728+
444729define i16 @fneg_select_i16_1 (i16 %cond , i16 %a , i16 %b ) {
445730; GFX7-LABEL: fneg_select_i16_1:
446731; GFX7: ; %bb.0:
@@ -609,4 +894,4 @@ define i16 @fneg_1_fabs_2_select_i16(i16 %cond, i16 %a, i16 %b) {
609894 %cmp = icmp eq i16 %cond , zeroinitializer
610895 %select = select i1 %cmp , i16 %neg.a , i16 %abs.b
611896 ret i16 %select
612- }
897+ }
0 commit comments