Skip to content

Commit 063cee7

Browse files
authored
[AMDGPU][MC] Allow opsel for v_max_i16 etc in GFX10 (#143982)
In GFX10, a number of VOP3 instructions should allow opsel, including V_MAX_I16, V_MAX_U16, V_MIN_I16, V_MIN_U16, V_MUL_LO_U16, V_LSHLREV_B16, V_LSHRREV_B16, and V_ASHRREV_I16.
1 parent 1dfdd1e commit 063cee7

File tree

10 files changed

+197
-32
lines changed

10 files changed

+197
-32
lines changed

llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,7 +735,9 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
735735
case AMDGPU::V_ASHRREV_I16_e32:
736736
case AMDGPU::V_LSHLREV_B16_e32:
737737
case AMDGPU::V_LSHRREV_B16_e64:
738+
case AMDGPU::V_LSHRREV_B16_opsel_e64:
738739
case AMDGPU::V_ASHRREV_I16_e64:
740+
case AMDGPU::V_LSHLREV_B16_opsel_e64:
739741
case AMDGPU::V_LSHLREV_B16_e64: {
740742
// from: v_lshrrev_b16_e32 v1, 8, v0
741743
// to SDWA src:v0 src_sel:BYTE_1
@@ -758,11 +760,13 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
758760
break;
759761

760762
if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
763+
Opcode == AMDGPU::V_LSHLREV_B16_opsel_e64 ||
761764
Opcode == AMDGPU::V_LSHLREV_B16_e64)
762765
return std::make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
763766
return std::make_unique<SDWASrcOperand>(
764767
Src1, Dst, BYTE_1, false, false,
765768
Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
769+
Opcode != AMDGPU::V_LSHRREV_B16_opsel_e64 &&
766770
Opcode != AMDGPU::V_LSHRREV_B16_e64);
767771
break;
768772
}

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,11 @@ multiclass VOP2Inst_e64_t16<string opName,
211211
string revOp = opName> {
212212
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
213213
defm NAME : VOP2Inst<opName, P, node, revOp>;
214+
let SubtargetPredicate = isGFX10Only in {
215+
// V_MAX_I16 etc use VOP3 encoding and allow OP_SEL
216+
def _opsel_e64 : VOP3InstBase <opName#"_vop3", VOP3_Profile<P, VOP3_OPSEL>, node, 1>,
217+
Commutable_REV<revOp#"_vop3_e64", !eq(revOp, opName)>;
218+
}
214219
}
215220
let SubtargetPredicate = UseRealTrue16Insts in {
216221
defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16">;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2016,16 +2016,14 @@ defm V_DIV_FIXUP_F16 :
20162016
defm V_ADD_NC_U16 : VOP3OpSel_Real_gfx10<0x303>;
20172017
defm V_SUB_NC_U16 : VOP3OpSel_Real_gfx10<0x304>;
20182018

2019-
// FIXME-GFX10-OPSEL: Need to add "selective" opsel support to some of these
2020-
// (they do not support SDWA or DPP).
2021-
defm V_MUL_LO_U16 : VOP3_Real_gfx10_with_name<0x305, "V_MUL_LO_U16", "v_mul_lo_u16">;
2022-
defm V_LSHRREV_B16 : VOP3_Real_gfx10_with_name<0x307, "V_LSHRREV_B16", "v_lshrrev_b16">;
2023-
defm V_ASHRREV_I16 : VOP3_Real_gfx10_with_name<0x308, "V_ASHRREV_I16", "v_ashrrev_i16">;
2024-
defm V_MAX_U16 : VOP3_Real_gfx10_with_name<0x309, "V_MAX_U16", "v_max_u16">;
2025-
defm V_MAX_I16 : VOP3_Real_gfx10_with_name<0x30a, "V_MAX_I16", "v_max_i16">;
2026-
defm V_MIN_U16 : VOP3_Real_gfx10_with_name<0x30b, "V_MIN_U16", "v_min_u16">;
2027-
defm V_MIN_I16 : VOP3_Real_gfx10_with_name<0x30c, "V_MIN_I16", "v_min_i16">;
2028-
defm V_LSHLREV_B16 : VOP3_Real_gfx10_with_name<0x314, "V_LSHLREV_B16", "v_lshlrev_b16">;
2019+
defm V_MUL_LO_U16 : VOP3OpSel_Real_gfx10_with_name<0x305, "V_MUL_LO_U16_opsel", "v_mul_lo_u16">;
2020+
defm V_LSHRREV_B16 : VOP3OpSel_Real_gfx10_with_name<0x307, "V_LSHRREV_B16_opsel", "v_lshrrev_b16">;
2021+
defm V_ASHRREV_I16 : VOP3OpSel_Real_gfx10_with_name<0x308, "V_ASHRREV_I16_opsel", "v_ashrrev_i16">;
2022+
defm V_MAX_U16 : VOP3OpSel_Real_gfx10_with_name<0x309, "V_MAX_U16_opsel", "v_max_u16">;
2023+
defm V_MAX_I16 : VOP3OpSel_Real_gfx10_with_name<0x30a, "V_MAX_I16_opsel", "v_max_i16">;
2024+
defm V_MIN_U16 : VOP3OpSel_Real_gfx10_with_name<0x30b, "V_MIN_U16_opsel", "v_min_u16">;
2025+
defm V_MIN_I16 : VOP3OpSel_Real_gfx10_with_name<0x30c, "V_MIN_I16_opsel", "v_min_i16">;
2026+
defm V_LSHLREV_B16 : VOP3OpSel_Real_gfx10_with_name<0x314, "V_LSHLREV_B16_opsel", "v_lshlrev_b16">;
20292027
defm V_PERMLANE16_B32 : VOP3OpSel_Real_gfx10<0x377>;
20302028
defm V_PERMLANEX16_B32 : VOP3OpSel_Real_gfx10<0x378>;
20312029

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ body: |
100100
; GFX10-NEXT: {{ $}}
101101
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
102102
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
103-
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
103+
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_opsel_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
104104
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]]
105105
; GFX11-LABEL: name: ashr_s16_s16_vs
106106
; GFX11: liveins: $sgpr0, $vgpr0
@@ -193,7 +193,7 @@ body: |
193193
; GFX10-NEXT: {{ $}}
194194
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
195195
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
196-
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
196+
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_opsel_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
197197
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]]
198198
; GFX11-LABEL: name: ashr_s16_s16_vv
199199
; GFX11: liveins: $vgpr0, $vgpr1
@@ -238,7 +238,7 @@ body: |
238238
; GFX10-NEXT: {{ $}}
239239
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
240240
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
241-
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
241+
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_opsel_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
242242
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
243243
; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ASHRREV_I16_e64_]], implicit $exec
244244
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
@@ -292,7 +292,7 @@ body: |
292292
; GFX10-NEXT: {{ $}}
293293
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
294294
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
295-
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
295+
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_opsel_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
296296
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
297297
; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
298298
; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec
@@ -442,7 +442,7 @@ body: |
442442
; GFX10-NEXT: {{ $}}
443443
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
444444
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
445-
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
445+
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_opsel_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
446446
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]]
447447
; GFX11-LABEL: name: ashr_s16_s16_sv
448448
; GFX11: liveins: $sgpr0, $vgpr0

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ body: |
9898
; GFX10-NEXT: {{ $}}
9999
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
100100
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
101-
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
101+
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_opsel_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
102102
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]]
103103
; GFX11-LABEL: name: lshr_s16_s16_vs
104104
; GFX11: liveins: $sgpr0, $vgpr0
@@ -191,7 +191,7 @@ body: |
191191
; GFX10-NEXT: {{ $}}
192192
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
193193
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
194-
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
194+
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_opsel_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
195195
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]]
196196
; GFX11-LABEL: name: lshr_s16_s16_vv
197197
; GFX11: liveins: $vgpr0, $vgpr1
@@ -236,7 +236,7 @@ body: |
236236
; GFX10-NEXT: {{ $}}
237237
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
238238
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
239-
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
239+
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_opsel_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
240240
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
241241
; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHRREV_B16_e64_]], implicit $exec
242242
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
@@ -290,7 +290,7 @@ body: |
290290
; GFX10-NEXT: {{ $}}
291291
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
292292
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
293-
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
293+
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_opsel_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
294294
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
295295
; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
296296
; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec
@@ -440,7 +440,7 @@ body: |
440440
; GFX10-NEXT: {{ $}}
441441
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
442442
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
443-
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
443+
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_opsel_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
444444
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]]
445445
; GFX11-LABEL: name: lshr_s16_s16_sv
446446
; GFX11: liveins: $sgpr0, $vgpr0

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
33
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
4-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
4+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
55
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s
66

77
---
@@ -34,6 +34,15 @@ body: |
3434
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
3535
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]]
3636
;
37+
; GFX10-LABEL: name: smed3_s16_vvv
38+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
39+
; GFX10-NEXT: {{ $}}
40+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
41+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
42+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
43+
; GFX10-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
44+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]]
45+
;
3746
; GFX11-LABEL: name: smed3_s16_vvv
3847
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
3948
; GFX11-NEXT: {{ $}}
@@ -88,6 +97,16 @@ body: |
8897
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
8998
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]]
9099
;
100+
; GFX10-LABEL: name: smed3_s16_vvv_multiuse0
101+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
102+
; GFX10-NEXT: {{ $}}
103+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
104+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
105+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
106+
; GFX10-NEXT: [[V_MAX_I16_opsel_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_opsel_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
107+
; GFX10-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
108+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_opsel_e64_]]
109+
;
91110
; GFX11-LABEL: name: smed3_s16_vvv_multiuse0
92111
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
93112
; GFX11-NEXT: {{ $}}
@@ -143,6 +162,16 @@ body: |
143162
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
144163
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_e64_]]
145164
;
165+
; GFX10-LABEL: name: smed3_s16_vvv_multiuse1
166+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
167+
; GFX10-NEXT: {{ $}}
168+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
169+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
170+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
171+
; GFX10-NEXT: [[V_MIN_I16_opsel_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_opsel_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
172+
; GFX10-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
173+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_opsel_e64_]]
174+
;
146175
; GFX11-LABEL: name: smed3_s16_vvv_multiuse1
147176
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
148177
; GFX11-NEXT: {{ $}}
@@ -199,6 +228,17 @@ body: |
199228
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
200229
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]]
201230
;
231+
; GFX10-LABEL: name: smed3_s16_vvv_multiuse2
232+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
233+
; GFX10-NEXT: {{ $}}
234+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
235+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
236+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
237+
; GFX10-NEXT: [[V_MIN_I16_opsel_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_opsel_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
238+
; GFX10-NEXT: [[V_MAX_I16_opsel_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_opsel_e64 0, [[V_MIN_I16_opsel_e64_]], 0, [[COPY2]], 0, 0, implicit $exec
239+
; GFX10-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
240+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_opsel_e64_]]
241+
;
202242
; GFX11-LABEL: name: smed3_s16_vvv_multiuse2
203243
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
204244
; GFX11-NEXT: {{ $}}

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
33
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
4-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
4+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
55
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s
66

77
---
@@ -34,6 +34,15 @@ body: |
3434
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
3535
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]]
3636
;
37+
; GFX10-LABEL: name: umed3_s16_vvv
38+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
39+
; GFX10-NEXT: {{ $}}
40+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
41+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
42+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
43+
; GFX10-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
44+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]]
45+
;
3746
; GFX11-LABEL: name: umed3_s16_vvv
3847
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
3948
; GFX11-NEXT: {{ $}}
@@ -88,6 +97,16 @@ body: |
8897
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
8998
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]]
9099
;
100+
; GFX10-LABEL: name: umed3_s16_vvv_multiuse0
101+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
102+
; GFX10-NEXT: {{ $}}
103+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
104+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
105+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
106+
; GFX10-NEXT: [[V_MAX_U16_opsel_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_opsel_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
107+
; GFX10-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
108+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_opsel_e64_]]
109+
;
91110
; GFX11-LABEL: name: umed3_s16_vvv_multiuse0
92111
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
93112
; GFX11-NEXT: {{ $}}
@@ -143,6 +162,16 @@ body: |
143162
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
144163
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_e64_]]
145164
;
165+
; GFX10-LABEL: name: umed3_s16_vvv_multiuse1
166+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
167+
; GFX10-NEXT: {{ $}}
168+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
169+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
170+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
171+
; GFX10-NEXT: [[V_MIN_U16_opsel_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_opsel_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
172+
; GFX10-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
173+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_opsel_e64_]]
174+
;
146175
; GFX11-LABEL: name: umed3_s16_vvv_multiuse1
147176
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
148177
; GFX11-NEXT: {{ $}}
@@ -199,6 +228,17 @@ body: |
199228
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
200229
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]]
201230
;
231+
; GFX10-LABEL: name: umed3_s16_vvv_multiuse2
232+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
233+
; GFX10-NEXT: {{ $}}
234+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
235+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
236+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
237+
; GFX10-NEXT: [[V_MIN_U16_opsel_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_opsel_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
238+
; GFX10-NEXT: [[V_MAX_U16_opsel_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_opsel_e64 0, [[V_MIN_U16_opsel_e64_]], 0, [[COPY2]], 0, 0, implicit $exec
239+
; GFX10-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
240+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_opsel_e64_]]
241+
;
202242
; GFX11-LABEL: name: umed3_s16_vvv_multiuse2
203243
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
204244
; GFX11-NEXT: {{ $}}

0 commit comments

Comments
 (0)