Skip to content

Commit dabc8e2

Browse files
shiltianrampitec
andauthored
[AMDGPU] Add support for v_rcp_bf16 on gfx1250 (#148916)
Co-authored-by: Mekhanoshin, Stanislav <[email protected]>
1 parent 859dcfc commit dabc8e2

23 files changed

+823
-0
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
669669
TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")
670670

671671
TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts")
672+
TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts")
672673

673674
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
674675
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
411411
case AMDGPU::BI__builtin_amdgcn_rcp:
412412
case AMDGPU::BI__builtin_amdgcn_rcpf:
413413
case AMDGPU::BI__builtin_amdgcn_rcph:
414+
case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
414415
return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
415416
case AMDGPU::BI__builtin_amdgcn_sqrt:
416417
case AMDGPU::BI__builtin_amdgcn_sqrtf:

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,25 @@ void test_tanh_bf16(global __bf16* out, __bf16 a)
6161
*out = __builtin_amdgcn_tanh_bf16(a);
6262
}
6363

64+
// CHECK-LABEL: @test_rcp_bf16(
65+
// CHECK-NEXT: entry:
66+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
67+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
68+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
69+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
70+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
71+
// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
72+
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2
73+
// CHECK-NEXT: [[TMP1:%.*]] = call bfloat @llvm.amdgcn.rcp.bf16(bfloat [[TMP0]])
74+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
75+
// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
76+
// CHECK-NEXT: ret void
77+
//
78+
void test_rcp_bf16(global __bf16* out, __bf16 a)
79+
{
80+
*out = __builtin_amdgcn_rcp_bf16(a);
81+
}
82+
6483
// CHECK-LABEL: @test_cvt_f16_fp8(
6584
// CHECK-NEXT: entry:
6685
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,7 @@ defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
529529

530530
let SubtargetPredicate = HasBF16TransInsts in {
531531
defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>;
532+
defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
532533
}
533534
} // End TRANS = 1, SchedRW = [WriteTrans32]
534535
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
@@ -1137,6 +1138,7 @@ defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
11371138
defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
11381139
defm V_CVT_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x077>;
11391140
defm V_CVT_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x078>;
1141+
defm V_RCP_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x079>;
11401142

11411143
//===----------------------------------------------------------------------===//
11421144
// GFX10.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; xUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefix=SDAG-TRUE16 %s
3+
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefix=SDAG-FAKE16 %s
4+
; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefix=GI-TRUE16 %s
5+
; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefix=GI-FAKE16 %s
6+
7+
; FIXME: t16 doesn't work at the moment because the store of s16 under t16 mode fails to select.
8+
; FIXME: GlobalISel does not work with bf16
9+
10+
declare bfloat @llvm.amdgcn.rcp.bf16(bfloat) #0
11+
12+
define amdgpu_kernel void @rcp_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
13+
; SDAG-TRUE16-LABEL: rcp_bf16:
14+
; SDAG-TRUE16: ; %bb.0:
15+
; SDAG-TRUE16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
16+
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
17+
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
18+
; SDAG-TRUE16-NEXT: v_rcp_bf16_e32 v0.l, s2
19+
; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
20+
; SDAG-TRUE16-NEXT: s_endpgm
21+
;
22+
; SDAG-FAKE16-LABEL: rcp_bf16:
23+
; SDAG-FAKE16: ; %bb.0:
24+
; SDAG-FAKE16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
25+
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
26+
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
27+
; SDAG-FAKE16-NEXT: v_rcp_bf16_e32 v0, s2
28+
; SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1]
29+
; SDAG-FAKE16-NEXT: s_endpgm
30+
%rcp = call bfloat @llvm.amdgcn.rcp.bf16(bfloat %src) #0
31+
store bfloat %rcp, ptr addrspace(1) %out, align 2
32+
ret void
33+
}
34+
35+
attributes #0 = { nounwind readnone }
36+
attributes #1 = { nounwind }

llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,51 @@ v_tanh_bf16 v5, src_scc
7373
v_tanh_bf16 v127, 0x8000
7474
// GFX1250: v_tanh_bf16_e32 v127, 0x8000 ; encoding: [0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00]
7575

76+
v_rcp_bf16 v5, v1
77+
// GFX1250: v_rcp_bf16_e32 v5, v1 ; encoding: [0x01,0xf3,0x0a,0x7e]
78+
79+
v_rcp_bf16 v5, v127
80+
// GFX1250: v_rcp_bf16_e32 v5, v127 ; encoding: [0x7f,0xf3,0x0a,0x7e]
81+
82+
v_rcp_bf16 v5, s1
83+
// GFX1250: v_rcp_bf16_e32 v5, s1 ; encoding: [0x01,0xf2,0x0a,0x7e]
84+
85+
v_rcp_bf16 v5, s105
86+
// GFX1250: v_rcp_bf16_e32 v5, s105 ; encoding: [0x69,0xf2,0x0a,0x7e]
87+
88+
v_rcp_bf16 v5, vcc_lo
89+
// GFX1250: v_rcp_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xf2,0x0a,0x7e]
90+
91+
v_rcp_bf16 v5, vcc_hi
92+
// GFX1250: v_rcp_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xf2,0x0a,0x7e]
93+
94+
v_rcp_bf16 v5, ttmp15
95+
// GFX1250: v_rcp_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xf2,0x0a,0x7e]
96+
97+
v_rcp_bf16 v5, m0
98+
// GFX1250: v_rcp_bf16_e32 v5, m0 ; encoding: [0x7d,0xf2,0x0a,0x7e]
99+
100+
v_rcp_bf16 v5, exec_lo
101+
// GFX1250: v_rcp_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xf2,0x0a,0x7e]
102+
103+
v_rcp_bf16 v5, exec_hi
104+
// GFX1250: v_rcp_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xf2,0x0a,0x7e]
105+
106+
v_rcp_bf16 v5, null
107+
// GFX1250: v_rcp_bf16_e32 v5, null ; encoding: [0x7c,0xf2,0x0a,0x7e]
108+
109+
v_rcp_bf16 v5, -1
110+
// GFX1250: v_rcp_bf16_e32 v5, -1 ; encoding: [0xc1,0xf2,0x0a,0x7e]
111+
112+
v_rcp_bf16 v5, 0.5
113+
// GFX1250: v_rcp_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xf2,0x0a,0x7e]
114+
115+
v_rcp_bf16 v5, src_scc
116+
// GFX1250: v_rcp_bf16_e32 v5, src_scc ; encoding: [0xfd,0xf2,0x0a,0x7e]
117+
118+
v_rcp_bf16 v127, 0x8000
119+
// GFX1250: v_rcp_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf2,0xfe,0x7e,0x00,0x80,0x00,0x00]
120+
76121
v_cvt_f32_bf16 v5, v1
77122
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
78123

llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,54 @@ v_tanh_bf16 v127, 0x8000
7676
v_tanh_bf16 v5.h, v1.h
7777
// GFX1250: v_tanh_bf16_e32 v5.h, v1.h ; encoding: [0x81,0x95,0x0a,0x7f]
7878

79+
v_rcp_bf16 v5, v1
80+
// GFX1250: v_rcp_bf16_e32 v5, v1 ; encoding: [0x01,0xf3,0x0a,0x7e]
81+
82+
v_rcp_bf16 v5, v127
83+
// GFX1250: v_rcp_bf16_e32 v5, v127 ; encoding: [0x7f,0xf3,0x0a,0x7e]
84+
85+
v_rcp_bf16 v5, s1
86+
// GFX1250: v_rcp_bf16_e32 v5, s1 ; encoding: [0x01,0xf2,0x0a,0x7e]
87+
88+
v_rcp_bf16 v5, s105
89+
// GFX1250: v_rcp_bf16_e32 v5, s105 ; encoding: [0x69,0xf2,0x0a,0x7e]
90+
91+
v_rcp_bf16 v5, vcc_lo
92+
// GFX1250: v_rcp_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xf2,0x0a,0x7e]
93+
94+
v_rcp_bf16 v5, vcc_hi
95+
// GFX1250: v_rcp_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xf2,0x0a,0x7e]
96+
97+
v_rcp_bf16 v5, ttmp15
98+
// GFX1250: v_rcp_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xf2,0x0a,0x7e]
99+
100+
v_rcp_bf16 v5, m0
101+
// GFX1250: v_rcp_bf16_e32 v5, m0 ; encoding: [0x7d,0xf2,0x0a,0x7e]
102+
103+
v_rcp_bf16 v5, exec_lo
104+
// GFX1250: v_rcp_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xf2,0x0a,0x7e]
105+
106+
v_rcp_bf16 v5, exec_hi
107+
// GFX1250: v_rcp_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xf2,0x0a,0x7e]
108+
109+
v_rcp_bf16 v5, null
110+
// GFX1250: v_rcp_bf16_e32 v5, null ; encoding: [0x7c,0xf2,0x0a,0x7e]
111+
112+
v_rcp_bf16 v5, -1
113+
// GFX1250: v_rcp_bf16_e32 v5, -1 ; encoding: [0xc1,0xf2,0x0a,0x7e]
114+
115+
v_rcp_bf16 v5, 0.5
116+
// GFX1250: v_rcp_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xf2,0x0a,0x7e]
117+
118+
v_rcp_bf16 v5, src_scc
119+
// GFX1250: v_rcp_bf16_e32 v5, src_scc ; encoding: [0xfd,0xf2,0x0a,0x7e]
120+
121+
v_rcp_bf16 v127, 0x8000
122+
// GFX1250: v_rcp_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf2,0xfe,0x7e,0x00,0x80,0x00,0x00]
123+
124+
v_rcp_bf16 v5.h, v1.h
125+
// GFX1250: v_rcp_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf3,0x0a,0x7f]
126+
79127
v_cvt_f32_bf16 v5, v1
80128
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
81129

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,62 @@ v_tanh_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 f
5858
// GFX1250: v_tanh_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
5959
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
6060

61+
v_rcp_bf16 v5, v1 quad_perm:[3,2,1,0]
62+
// GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
63+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
64+
65+
v_rcp_bf16 v5, v1 quad_perm:[0,1,2,3]
66+
// GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0xe4,0x00,0xff]
67+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
68+
69+
v_rcp_bf16 v5, v1 row_mirror
70+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x40,0x01,0xff]
71+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
72+
73+
v_rcp_bf16 v5, v1 row_half_mirror
74+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x41,0x01,0xff]
75+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
76+
77+
v_rcp_bf16 v5, v1 row_shl:1
78+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x01,0x01,0xff]
79+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
80+
81+
v_rcp_bf16 v5, v1 row_shl:15
82+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x0f,0x01,0xff]
83+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
84+
85+
v_rcp_bf16 v5, v1 row_shr:1
86+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x11,0x01,0xff]
87+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
88+
89+
v_rcp_bf16 v5, v1 row_shr:15
90+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1f,0x01,0xff]
91+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
92+
93+
v_rcp_bf16 v5, v1 row_ror:1
94+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x21,0x01,0xff]
95+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
96+
97+
v_rcp_bf16 v5, v1 row_ror:15
98+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x2f,0x01,0xff]
99+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
100+
101+
v_rcp_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
102+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x50,0x01,0xff]
103+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
104+
105+
v_rcp_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
106+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x5f,0x01,0x01]
107+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
108+
109+
v_rcp_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
110+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x60,0x09,0x13]
111+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
112+
113+
v_rcp_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
114+
// GFX1250: v_rcp_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf2,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
115+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
116+
61117
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
62118
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
63119
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,66 @@ v_tanh_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
6262
// GFX1250: v_tanh_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7f,0x81,0x1b,0x00,0xff]
6363
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
6464

65+
v_rcp_bf16 v5, v1 quad_perm:[3,2,1,0]
66+
// GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
67+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
68+
69+
v_rcp_bf16 v5, v1 quad_perm:[0,1,2,3]
70+
// GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0xe4,0x00,0xff]
71+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
72+
73+
v_rcp_bf16 v5, v1 row_mirror
74+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x40,0x01,0xff]
75+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
76+
77+
v_rcp_bf16 v5, v1 row_half_mirror
78+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x41,0x01,0xff]
79+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
80+
81+
v_rcp_bf16 v5, v1 row_shl:1
82+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x01,0x01,0xff]
83+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
84+
85+
v_rcp_bf16 v5, v1 row_shl:15
86+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x0f,0x01,0xff]
87+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
88+
89+
v_rcp_bf16 v5, v1 row_shr:1
90+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x11,0x01,0xff]
91+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
92+
93+
v_rcp_bf16 v5, v1 row_shr:15
94+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1f,0x01,0xff]
95+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
96+
97+
v_rcp_bf16 v5, v1 row_ror:1
98+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x21,0x01,0xff]
99+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
100+
101+
v_rcp_bf16 v5, v1 row_ror:15
102+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x2f,0x01,0xff]
103+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
104+
105+
v_rcp_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
106+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x50,0x01,0xff]
107+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
108+
109+
v_rcp_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
110+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x5f,0x01,0x01]
111+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
112+
113+
v_rcp_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
114+
// GFX1250: v_rcp_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x60,0x09,0x13]
115+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
116+
117+
v_rcp_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
118+
// GFX1250: v_rcp_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf2,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
119+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
120+
121+
v_rcp_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
122+
// GFX1250: v_rcp_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7f,0x81,0x1b,0x00,0xff]
123+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
124+
65125
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
66126
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
67127
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,18 @@ v_tanh_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
1414
// GFX1250: v_tanh_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00]
1515
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
1616

17+
v_rcp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
18+
// GFX1250: v_rcp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7e,0x01,0x77,0x39,0x05]
19+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
20+
21+
v_rcp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
22+
// GFX1250: v_rcp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf2,0x0a,0x7e,0x01,0x77,0x39,0x05]
23+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
24+
25+
v_rcp_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
26+
// GFX1250: v_rcp_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf2,0xfe,0x7e,0x7f,0x00,0x00,0x00]
27+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
28+
1729
v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
1830
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
1931
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

0 commit comments

Comments
 (0)