@@ -34,19 +34,11 @@ define i8 @test_vector_reduce_smax_v2i8(<2 x i8> %v) {
3434; GFX7-GISEL-NEXT: v_max_i32_e32 v0, v0, v1
3535; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
3636;
37- ; GFX8-SDAG-LABEL: test_vector_reduce_smax_v2i8:
38- ; GFX8-SDAG: ; %bb.0: ; %entry
39- ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40- ; GFX8-SDAG-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
41- ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
42- ;
43- ; GFX8-GISEL-LABEL: test_vector_reduce_smax_v2i8:
44- ; GFX8-GISEL: ; %bb.0: ; %entry
45- ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
47- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
48- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
49- ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
37+ ; GFX8-LABEL: test_vector_reduce_smax_v2i8:
38+ ; GFX8: ; %bb.0: ; %entry
39+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40+ ; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
41+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
5042;
5143; GFX9-LABEL: test_vector_reduce_smax_v2i8:
5244; GFX9: ; %bb.0: ; %entry
@@ -173,11 +165,8 @@ define i8 @test_vector_reduce_smax_v3i8(<3 x i8> %v) {
173165; GFX8-GISEL-LABEL: test_vector_reduce_smax_v3i8:
174166; GFX8-GISEL: ; %bb.0: ; %entry
175167; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
177- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
178- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
179- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v2
180- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
168+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
169+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, v0, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
181170; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
182171;
183172; GFX9-SDAG-LABEL: test_vector_reduce_smax_v3i8:
@@ -350,23 +339,20 @@ define i8 @test_vector_reduce_smax_v4i8(<4 x i8> %v) {
350339; GFX8-GISEL-LABEL: test_vector_reduce_smax_v4i8:
351340; GFX8-GISEL: ; %bb.0: ; %entry
352341; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
354- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v2, 8, v2
355- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
356- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v3, 8, v3
357- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
358- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
359- ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0
360- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
361- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
342+ ; GFX8-GISEL-NEXT: s_sext_i32_i8 s4, s4
343+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
344+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
345+ ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, s4
346+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
347+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
362348; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v1
363- ; GFX8-GISEL-NEXT: v_max_i16_e32 v1, 0 , v1
349+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v1, s4 , v1
364350; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8
365- ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0 , v2
351+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4 , v2
366352; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
367353; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
368354; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2
369- ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0 , v3
355+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4 , v3
370356; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
371357; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
372358; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v3
@@ -675,30 +661,23 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
675661; GFX8-GISEL-LABEL: test_vector_reduce_smax_v8i8:
676662; GFX8-GISEL: ; %bb.0: ; %entry
677663; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
679- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v4
680- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
681- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
682- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v5
683- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
684- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v2, 8, v2
685- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v6
686- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
687- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v3, 8, v3
688- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v7
689- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
664+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
665+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
666+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v6) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
667+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v7) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
668+ ; GFX8-GISEL-NEXT: s_sext_i32_i8 s4, s4
690669; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v2
691670; GFX8-GISEL-NEXT: v_max_i16_e32 v1, v1, v3
692- ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0 , v2
671+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4 , v2
693672; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v1
694- ; GFX8-GISEL-NEXT: v_max_i16_e32 v1, 0 , v1
673+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v1, s4 , v1
695674; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8
696- ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0 , v2
675+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4 , v2
697676; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
698- ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0 , v3
677+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4 , v3
699678; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
700679; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2
701- ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0 , v3
680+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4 , v3
702681; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
703682; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
704683; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v3
@@ -1135,46 +1114,31 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
11351114; GFX8-GISEL-LABEL: test_vector_reduce_smax_v16i8:
11361115; GFX8-GISEL: ; %bb.0: ; %entry
11371116; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1138- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1139- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v8
1140- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1141- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1142- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v9
1143- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1144- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1145- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v10
1146- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1147- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1148- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v11
1149- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1150- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v4
1151- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v12
1152- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v4, sext(v4), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1153- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v5, 8, v5
1154- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v13
1155- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v5, sext(v5), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1156- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v6, 8, v6
1157- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v14
1158- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v6, sext(v6), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1159- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v7, 8, v7
1160- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v15
1161- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v7, sext(v7), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1117+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1118+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1119+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1120+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1121+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v4, sext(v4), sext(v12) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1122+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v5, sext(v5), sext(v13) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1123+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1124+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
11621125; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v4
11631126; GFX8-GISEL-NEXT: v_max_i16_e32 v1, v1, v5
11641127; GFX8-GISEL-NEXT: v_max_i16_e32 v2, v2, v6
11651128; GFX8-GISEL-NEXT: v_max_i16_e32 v3, v3, v7
1129+ ; GFX8-GISEL-NEXT: s_sext_i32_i8 s4, s4
11661130; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v2
11671131; GFX8-GISEL-NEXT: v_max_i16_e32 v1, v1, v3
1168- ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0 , v2
1132+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4 , v2
11691133; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v1
1170- ; GFX8-GISEL-NEXT: v_max_i16_e32 v1, 0 , v1
1134+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v1, s4 , v1
11711135; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8
1172- ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0 , v2
1136+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4 , v2
11731137; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1174- ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0 , v3
1138+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4 , v3
11751139; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
11761140; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2
1177- ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0 , v3
1141+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4 , v3
11781142; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
11791143; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
11801144; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v3
0 commit comments