Skip to content

Commit 49357b2

Browse files
authored
[AMDGPU][True16][CodeGen] true16 codegen pattern for v_med3_u/i16 (#121850)
True16 codegen pattern for v_med3_u/i16
1 parent 478648e commit 49357b2

File tree

3 files changed

+20
-0
lines changed

3 files changed

+20
-0
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3726,6 +3726,10 @@ let True16Predicate = NotHasTrue16BitInsts in {
37263726
defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax, VSrc_b16>;
37273727
defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax, VSrc_b16>;
37283728
}
3729+
let True16Predicate = UseRealTrue16Insts in {
3730+
defm : Int16Med3Pat<V_MED3_I16_t16_e64, smin, smax, VSrcT_b16>;
3731+
defm : Int16Med3Pat<V_MED3_U16_t16_e64, umin, umax, VSrcT_b16>;
3732+
}
37293733
let True16Predicate = UseFakeTrue16Insts in {
37303734
defm : Int16Med3Pat<V_MED3_I16_fake16_e64, smin, smax, VSrc_b16>;
37313735
defm : Int16Med3Pat<V_MED3_U16_fake16_e64, umin, umax, VSrc_b16>;

llvm/test/CodeGen/AMDGPU/smed3.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
22
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
33
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
5+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
46

57
declare i32 @llvm.amdgcn.workitem.id.x() #0
68

@@ -98,6 +100,8 @@ declare i64 @llvm.smin.i64(i64, i64)
98100
; VI: v_max_i16_e32 [[MAX:v[0-9]]], 12, {{v[0-9]}}
99101
; VI: v_min_i16_e32 {{v[0-9]}}, 17, [[MAX]]
100102
; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
103+
; GFX11-TRUE16: v_med3_i16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, 12, 17
104+
; GFX11-FAKE16: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
101105
define amdgpu_kernel void @v_test_smed3_r_i_i_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
102106
%tid = call i32 @llvm.amdgcn.workitem.id.x()
103107
%gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
@@ -686,6 +690,8 @@ bb:
686690
; VI: v_max_i16
687691

688692
; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
693+
; GFX11-TRUE16: v_med3_i16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, v{{[0-9]+}}.h, v{{[0-9]+}}.l
694+
; GFX11-FAKE16: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
689695
define amdgpu_kernel void @v_test_smed3_i16_pat_0(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
690696
bb:
691697
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -707,6 +713,8 @@ bb:
707713

708714
; GCN-LABEL: {{^}}v_test_smed3_i16_pat_1:
709715
; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
716+
; GFX11-TRUE16: v_med3_i16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, v{{[0-9]+}}.h, v{{[0-9]+}}.l
717+
; GFX11-FAKE16: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
710718

711719
define amdgpu_kernel void @v_test_smed3_i16_pat_1(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
712720
bb:

llvm/test/CodeGen/AMDGPU/umed3.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
22
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
33
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-FAKE16 %s
5+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-TRUE16 %s
46

57
declare i32 @llvm.amdgcn.workitem.id.x() #0
68

@@ -84,6 +86,8 @@ define amdgpu_kernel void @v_test_umed3_r_i_i_i64(ptr addrspace(1) %out, ptr add
8486
; VI: v_max_u16_e32 [[MAX:v[0-9]]], 12, {{v[0-9]}}
8587
; VI: v_min_u16_e32 {{v[0-9]}}, 17, [[MAX]]
8688
; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
89+
; GFX11-TRUE16: v_med3_u16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, 12, 17
90+
; GFX11-FAKE16: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
8791
define amdgpu_kernel void @v_test_umed3_r_i_i_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
8892
%tid = call i32 @llvm.amdgcn.workitem.id.x()
8993
%gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
@@ -707,6 +711,8 @@ bb:
707711
; VI: v_max_u16
708712

709713
; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
714+
; GFX11-TRUE16: v_med3_u16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, v{{[0-9]+}}.h, v{{[0-9]+}}.l
715+
; GFX11-FAKE16: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
710716
define amdgpu_kernel void @v_test_umed3_i16_pat_0(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
711717
bb:
712718
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -728,6 +734,8 @@ bb:
728734

729735
; GCN-LABEL: {{^}}v_test_umed3_i16_pat_1:
730736
; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
737+
; GFX11-TRUE16: v_med3_u16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, v{{[0-9]+}}.h, v{{[0-9]+}}.l
738+
; GFX11-FAKE16: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
731739
define amdgpu_kernel void @v_test_umed3_i16_pat_1(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
732740
bb:
733741
%tid = call i32 @llvm.amdgcn.workitem.id.x()

0 commit comments

Comments
 (0)