@@ -834,6 +834,300 @@ define amdgpu_kernel void @s_and_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
834834 store i64 %and , ptr addrspace (1 ) %out , align 8
835835 ret void
836836}
837+
838+ define <2 x i128 > @v_and_v2i128 (<2 x i128 > %a , <2 x i128 > %b ) {
839+ ; GCN-LABEL: v_and_v2i128:
840+ ; GCN: ; %bb.0:
841+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
842+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v8
843+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v9
844+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v10
845+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v11
846+ ; GCN-NEXT: v_and_b32_e32 v4, v4, v12
847+ ; GCN-NEXT: v_and_b32_e32 v5, v5, v13
848+ ; GCN-NEXT: v_and_b32_e32 v6, v6, v14
849+ ; GCN-NEXT: v_and_b32_e32 v7, v7, v15
850+ ; GCN-NEXT: s_setpc_b64 s[30:31]
851+ ;
852+ ; GFX10PLUS-LABEL: v_and_v2i128:
853+ ; GFX10PLUS: ; %bb.0:
854+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
855+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v8
856+ ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v9
857+ ; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v10
858+ ; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v11
859+ ; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v12
860+ ; GFX10PLUS-NEXT: v_and_b32_e32 v5, v5, v13
861+ ; GFX10PLUS-NEXT: v_and_b32_e32 v6, v6, v14
862+ ; GFX10PLUS-NEXT: v_and_b32_e32 v7, v7, v15
863+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
864+ ;
865+ ; GFX12-LABEL: v_and_v2i128:
866+ ; GFX12: ; %bb.0:
867+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
868+ ; GFX12-NEXT: s_wait_expcnt 0x0
869+ ; GFX12-NEXT: s_wait_samplecnt 0x0
870+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
871+ ; GFX12-NEXT: s_wait_kmcnt 0x0
872+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v8
873+ ; GFX12-NEXT: v_and_b32_e32 v1, v1, v9
874+ ; GFX12-NEXT: v_and_b32_e32 v2, v2, v10
875+ ; GFX12-NEXT: v_and_b32_e32 v3, v3, v11
876+ ; GFX12-NEXT: v_and_b32_e32 v4, v4, v12
877+ ; GFX12-NEXT: v_and_b32_e32 v5, v5, v13
878+ ; GFX12-NEXT: v_and_b32_e32 v6, v6, v14
879+ ; GFX12-NEXT: v_and_b32_e32 v7, v7, v15
880+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
881+ %and = and <2 x i128 > %a , %b
882+ ret <2 x i128 > %and
883+ }
884+
885+ define <2 x i128 > @v_and_v2i128_inline_imm (<2 x i128 > %a ) {
886+ ; GCN-LABEL: v_and_v2i128_inline_imm:
887+ ; GCN: ; %bb.0:
888+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889+ ; GCN-NEXT: v_and_b32_e32 v0, 64, v0
890+ ; GCN-NEXT: v_and_b32_e32 v4, 64, v4
891+ ; GCN-NEXT: v_mov_b32_e32 v1, 0
892+ ; GCN-NEXT: v_mov_b32_e32 v2, 0
893+ ; GCN-NEXT: v_mov_b32_e32 v3, 0
894+ ; GCN-NEXT: v_mov_b32_e32 v5, 0
895+ ; GCN-NEXT: v_mov_b32_e32 v6, 0
896+ ; GCN-NEXT: v_mov_b32_e32 v7, 0
897+ ; GCN-NEXT: s_setpc_b64 s[30:31]
898+ ;
899+ ; GFX10-LABEL: v_and_v2i128_inline_imm:
900+ ; GFX10: ; %bb.0:
901+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
902+ ; GFX10-NEXT: v_and_b32_e32 v0, 64, v0
903+ ; GFX10-NEXT: v_and_b32_e32 v4, 64, v4
904+ ; GFX10-NEXT: v_mov_b32_e32 v1, 0
905+ ; GFX10-NEXT: v_mov_b32_e32 v2, 0
906+ ; GFX10-NEXT: v_mov_b32_e32 v3, 0
907+ ; GFX10-NEXT: v_mov_b32_e32 v5, 0
908+ ; GFX10-NEXT: v_mov_b32_e32 v6, 0
909+ ; GFX10-NEXT: v_mov_b32_e32 v7, 0
910+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
911+ ;
912+ ; GFX11-LABEL: v_and_v2i128_inline_imm:
913+ ; GFX11: ; %bb.0:
914+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
915+ ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 64, v0
916+ ; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 64, v4
917+ ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0
918+ ; GFX11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0
919+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
920+ ;
921+ ; GFX12-LABEL: v_and_v2i128_inline_imm:
922+ ; GFX12: ; %bb.0:
923+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
924+ ; GFX12-NEXT: s_wait_expcnt 0x0
925+ ; GFX12-NEXT: s_wait_samplecnt 0x0
926+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
927+ ; GFX12-NEXT: s_wait_kmcnt 0x0
928+ ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 64, v0
929+ ; GFX12-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 64, v4
930+ ; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0
931+ ; GFX12-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0
932+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
933+ %and = and <2 x i128 > %a , <i128 64 , i128 64 >
934+ ret <2 x i128 > %and
935+ }
936+
937+ define <3 x i128 > @v_and_v3i128 (<3 x i128 > %a , <3 x i128 > %b ) {
938+ ; GCN-LABEL: v_and_v3i128:
939+ ; GCN: ; %bb.0:
940+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
941+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v12
942+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v13
943+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v14
944+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v15
945+ ; GCN-NEXT: v_and_b32_e32 v4, v4, v16
946+ ; GCN-NEXT: v_and_b32_e32 v5, v5, v17
947+ ; GCN-NEXT: v_and_b32_e32 v6, v6, v18
948+ ; GCN-NEXT: v_and_b32_e32 v7, v7, v19
949+ ; GCN-NEXT: v_and_b32_e32 v8, v8, v20
950+ ; GCN-NEXT: v_and_b32_e32 v9, v9, v21
951+ ; GCN-NEXT: v_and_b32_e32 v10, v10, v22
952+ ; GCN-NEXT: v_and_b32_e32 v11, v11, v23
953+ ; GCN-NEXT: s_setpc_b64 s[30:31]
954+ ;
955+ ; GFX10PLUS-LABEL: v_and_v3i128:
956+ ; GFX10PLUS: ; %bb.0:
957+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
958+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v12
959+ ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v13
960+ ; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v14
961+ ; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v15
962+ ; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v16
963+ ; GFX10PLUS-NEXT: v_and_b32_e32 v5, v5, v17
964+ ; GFX10PLUS-NEXT: v_and_b32_e32 v6, v6, v18
965+ ; GFX10PLUS-NEXT: v_and_b32_e32 v7, v7, v19
966+ ; GFX10PLUS-NEXT: v_and_b32_e32 v8, v8, v20
967+ ; GFX10PLUS-NEXT: v_and_b32_e32 v9, v9, v21
968+ ; GFX10PLUS-NEXT: v_and_b32_e32 v10, v10, v22
969+ ; GFX10PLUS-NEXT: v_and_b32_e32 v11, v11, v23
970+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
971+ ;
972+ ; GFX12-LABEL: v_and_v3i128:
973+ ; GFX12: ; %bb.0:
974+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
975+ ; GFX12-NEXT: s_wait_expcnt 0x0
976+ ; GFX12-NEXT: s_wait_samplecnt 0x0
977+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
978+ ; GFX12-NEXT: s_wait_kmcnt 0x0
979+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v12
980+ ; GFX12-NEXT: v_and_b32_e32 v1, v1, v13
981+ ; GFX12-NEXT: v_and_b32_e32 v2, v2, v14
982+ ; GFX12-NEXT: v_and_b32_e32 v3, v3, v15
983+ ; GFX12-NEXT: v_and_b32_e32 v4, v4, v16
984+ ; GFX12-NEXT: v_and_b32_e32 v5, v5, v17
985+ ; GFX12-NEXT: v_and_b32_e32 v6, v6, v18
986+ ; GFX12-NEXT: v_and_b32_e32 v7, v7, v19
987+ ; GFX12-NEXT: v_and_b32_e32 v8, v8, v20
988+ ; GFX12-NEXT: v_and_b32_e32 v9, v9, v21
989+ ; GFX12-NEXT: v_and_b32_e32 v10, v10, v22
990+ ; GFX12-NEXT: v_and_b32_e32 v11, v11, v23
991+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
992+ %and = and <3 x i128 > %a , %b
993+ ret <3 x i128 > %and
994+ }
995+
996+ define <1 x i128 > @v_and_v1i128 (<1 x i128 > %a , <1 x i128 > %b ) {
997+ ; GCN-LABEL: v_and_v1i128:
998+ ; GCN: ; %bb.0:
999+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1000+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v4
1001+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v5
1002+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v6
1003+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v7
1004+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1005+ ;
1006+ ; GFX10PLUS-LABEL: v_and_v1i128:
1007+ ; GFX10PLUS: ; %bb.0:
1008+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1009+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v4
1010+ ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v5
1011+ ; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v6
1012+ ; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v7
1013+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1014+ ;
1015+ ; GFX12-LABEL: v_and_v1i128:
1016+ ; GFX12: ; %bb.0:
1017+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1018+ ; GFX12-NEXT: s_wait_expcnt 0x0
1019+ ; GFX12-NEXT: s_wait_samplecnt 0x0
1020+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
1021+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1022+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v4
1023+ ; GFX12-NEXT: v_and_b32_e32 v1, v1, v5
1024+ ; GFX12-NEXT: v_and_b32_e32 v2, v2, v6
1025+ ; GFX12-NEXT: v_and_b32_e32 v3, v3, v7
1026+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
1027+ %and = and <1 x i128 > %a , %b
1028+ ret <1 x i128 > %and
1029+ }
1030+
1031+ define <2 x i256 > @v_and_v2i256 (<2 x i256 > %a , <2 x i256 > %b ) {
1032+ ; GCN-LABEL: v_and_v2i256:
1033+ ; GCN: ; %bb.0:
1034+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1035+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v16
1036+ ; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32
1037+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v17
1038+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v18
1039+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v19
1040+ ; GCN-NEXT: v_and_b32_e32 v4, v4, v20
1041+ ; GCN-NEXT: v_and_b32_e32 v5, v5, v21
1042+ ; GCN-NEXT: v_and_b32_e32 v6, v6, v22
1043+ ; GCN-NEXT: v_and_b32_e32 v7, v7, v23
1044+ ; GCN-NEXT: v_and_b32_e32 v8, v8, v24
1045+ ; GCN-NEXT: v_and_b32_e32 v9, v9, v25
1046+ ; GCN-NEXT: v_and_b32_e32 v10, v10, v26
1047+ ; GCN-NEXT: v_and_b32_e32 v11, v11, v27
1048+ ; GCN-NEXT: v_and_b32_e32 v12, v12, v28
1049+ ; GCN-NEXT: v_and_b32_e32 v13, v13, v29
1050+ ; GCN-NEXT: v_and_b32_e32 v14, v14, v30
1051+ ; GCN-NEXT: s_waitcnt vmcnt(0)
1052+ ; GCN-NEXT: v_and_b32_e32 v15, v15, v16
1053+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1054+ ;
1055+ ; GFX10-LABEL: v_and_v2i256:
1056+ ; GFX10: ; %bb.0:
1057+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1058+ ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
1059+ ; GFX10-NEXT: v_and_b32_e32 v0, v0, v16
1060+ ; GFX10-NEXT: v_and_b32_e32 v1, v1, v17
1061+ ; GFX10-NEXT: v_and_b32_e32 v2, v2, v18
1062+ ; GFX10-NEXT: v_and_b32_e32 v3, v3, v19
1063+ ; GFX10-NEXT: v_and_b32_e32 v4, v4, v20
1064+ ; GFX10-NEXT: v_and_b32_e32 v5, v5, v21
1065+ ; GFX10-NEXT: v_and_b32_e32 v6, v6, v22
1066+ ; GFX10-NEXT: v_and_b32_e32 v7, v7, v23
1067+ ; GFX10-NEXT: v_and_b32_e32 v8, v8, v24
1068+ ; GFX10-NEXT: v_and_b32_e32 v9, v9, v25
1069+ ; GFX10-NEXT: v_and_b32_e32 v10, v10, v26
1070+ ; GFX10-NEXT: v_and_b32_e32 v11, v11, v27
1071+ ; GFX10-NEXT: v_and_b32_e32 v12, v12, v28
1072+ ; GFX10-NEXT: v_and_b32_e32 v13, v13, v29
1073+ ; GFX10-NEXT: v_and_b32_e32 v14, v14, v30
1074+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
1075+ ; GFX10-NEXT: v_and_b32_e32 v15, v15, v31
1076+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
1077+ ;
1078+ ; GFX11-LABEL: v_and_v2i256:
1079+ ; GFX11: ; %bb.0:
1080+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1081+ ; GFX11-NEXT: scratch_load_b32 v31, off, s32
1082+ ; GFX11-NEXT: v_and_b32_e32 v0, v0, v16
1083+ ; GFX11-NEXT: v_and_b32_e32 v1, v1, v17
1084+ ; GFX11-NEXT: v_and_b32_e32 v2, v2, v18
1085+ ; GFX11-NEXT: v_and_b32_e32 v3, v3, v19
1086+ ; GFX11-NEXT: v_and_b32_e32 v4, v4, v20
1087+ ; GFX11-NEXT: v_and_b32_e32 v5, v5, v21
1088+ ; GFX11-NEXT: v_and_b32_e32 v6, v6, v22
1089+ ; GFX11-NEXT: v_and_b32_e32 v7, v7, v23
1090+ ; GFX11-NEXT: v_and_b32_e32 v8, v8, v24
1091+ ; GFX11-NEXT: v_and_b32_e32 v9, v9, v25
1092+ ; GFX11-NEXT: v_and_b32_e32 v10, v10, v26
1093+ ; GFX11-NEXT: v_and_b32_e32 v11, v11, v27
1094+ ; GFX11-NEXT: v_and_b32_e32 v12, v12, v28
1095+ ; GFX11-NEXT: v_and_b32_e32 v13, v13, v29
1096+ ; GFX11-NEXT: v_and_b32_e32 v14, v14, v30
1097+ ; GFX11-NEXT: s_waitcnt vmcnt(0)
1098+ ; GFX11-NEXT: v_and_b32_e32 v15, v15, v31
1099+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
1100+ ;
1101+ ; GFX12-LABEL: v_and_v2i256:
1102+ ; GFX12: ; %bb.0:
1103+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1104+ ; GFX12-NEXT: s_wait_expcnt 0x0
1105+ ; GFX12-NEXT: s_wait_samplecnt 0x0
1106+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
1107+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1108+ ; GFX12-NEXT: scratch_load_b32 v31, off, s32
1109+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v16
1110+ ; GFX12-NEXT: v_and_b32_e32 v1, v1, v17
1111+ ; GFX12-NEXT: v_and_b32_e32 v2, v2, v18
1112+ ; GFX12-NEXT: v_and_b32_e32 v3, v3, v19
1113+ ; GFX12-NEXT: v_and_b32_e32 v4, v4, v20
1114+ ; GFX12-NEXT: v_and_b32_e32 v5, v5, v21
1115+ ; GFX12-NEXT: v_and_b32_e32 v6, v6, v22
1116+ ; GFX12-NEXT: v_and_b32_e32 v7, v7, v23
1117+ ; GFX12-NEXT: v_and_b32_e32 v8, v8, v24
1118+ ; GFX12-NEXT: v_and_b32_e32 v9, v9, v25
1119+ ; GFX12-NEXT: v_and_b32_e32 v10, v10, v26
1120+ ; GFX12-NEXT: v_and_b32_e32 v11, v11, v27
1121+ ; GFX12-NEXT: v_and_b32_e32 v12, v12, v28
1122+ ; GFX12-NEXT: v_and_b32_e32 v13, v13, v29
1123+ ; GFX12-NEXT: v_and_b32_e32 v14, v14, v30
1124+ ; GFX12-NEXT: s_wait_loadcnt 0x0
1125+ ; GFX12-NEXT: v_and_b32_e32 v15, v15, v31
1126+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
1127+ %and = and <2 x i256 > %a , %b
1128+ ret <2 x i256 > %and
1129+ }
1130+
8371131;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
8381132; GFX11-FAKE16: {{.*}}
8391133; GFX11-TRUE16: {{.*}}
0 commit comments