Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 31 additions & 8 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,13 @@ defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16",
defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>;


let HasClamp = 0, HasOMod = 0 in {
def V_TRANS_BF16_Profile : VOPProfile <[bf16, bf16, untyped, untyped]>;
def V_TRANS_BF16_t16_Profile : VOPProfile_True16 <VOP_BF16_BF16>;
def V_TRANS_BF16_fake16_Profile : VOPProfile_Fake16 <VOP_BF16_BF16>;
}

let TRANS = 1, SchedRW = [WriteTrans32] in {
defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
Expand All @@ -527,14 +534,30 @@ defm V_TANH_F16 : VOP1Inst_t16 <"v_tanh_f16", VOP_F16_F16, int_amdgcn_tanh>;
}

let SubtargetPredicate = HasBF16TransInsts in {
defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>;
defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>;
defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>;
defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>;
defm V_EXP_BF16 : VOP1Inst_t16 <"v_exp_bf16", VOP_BF16_BF16, AMDGPUexpf16>;
defm V_SIN_BF16 : VOP1Inst_t16 <"v_sin_bf16", VOP_BF16_BF16, AMDGPUsin>;
defm V_COS_BF16 : VOP1Inst_t16 <"v_cos_bf16", VOP_BF16_BF16, AMDGPUcos>;
defm V_TANH_BF16 : VOP1Inst_t16_with_profiles<"v_tanh_bf16", V_TRANS_BF16_Profile,
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
int_amdgcn_tanh>;
defm V_RCP_BF16 : VOP1Inst_t16_with_profiles<"v_rcp_bf16", V_TRANS_BF16_Profile,
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
AMDGPUrcp>;
defm V_SQRT_BF16 : VOP1Inst_t16_with_profiles<"v_sqrt_bf16", V_TRANS_BF16_Profile,
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
any_amdgcn_sqrt>;
defm V_RSQ_BF16 : VOP1Inst_t16_with_profiles<"v_rsq_bf16", V_TRANS_BF16_Profile,
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
AMDGPUrsq>;
defm V_LOG_BF16 : VOP1Inst_t16_with_profiles<"v_log_bf16", V_TRANS_BF16_Profile,
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
AMDGPUlogf16>;
defm V_EXP_BF16 : VOP1Inst_t16_with_profiles<"v_exp_bf16", V_TRANS_BF16_Profile,
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
AMDGPUexpf16>;
defm V_SIN_BF16 : VOP1Inst_t16_with_profiles<"v_sin_bf16", V_TRANS_BF16_Profile,
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
AMDGPUsin>;
defm V_COS_BF16 : VOP1Inst_t16_with_profiles<"v_cos_bf16", V_TRANS_BF16_Profile,
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
AMDGPUcos>;
}
} // End TRANS = 1, SchedRW = [WriteTrans32]
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/Target/AMDGPU/VOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1357,8 +1357,12 @@ class VOPBinOpClampPat<SDPatternOperator node, Instruction inst, ValueType vt> :

class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
dag src0 = !if(P.HasOMod,
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp));
!if(P.HasClamp,
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i32:$omod)),
!if(P.HasClamp,
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers)));

list<dag> ret3 = [(set P.DstVT:$vdst,
(DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
Expand Down
5 changes: 4 additions & 1 deletion llvm/test/CodeGen/AMDGPU/bf16-math.ll
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,10 @@ define amdgpu_ps float @test_clamp_v2bf16_s(<2 x bfloat> inreg %src) {
define amdgpu_ps bfloat @test_clamp_bf16_folding(bfloat %src) {
; GCN-LABEL: test_clamp_bf16_folding:
; GCN: ; %bb.0:
; GCN-NEXT: v_exp_bf16_e64 v0, v0 clamp
; GCN-NEXT: v_exp_bf16_e32 v0, v0
; GCN-NEXT: v_nop
; GCN-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GCN-NEXT: v_pk_max_num_bf16 v0, v0, v0 clamp
; GCN-NEXT: ; return to shader part epilog
%exp = call bfloat @llvm.exp2.bf16(bfloat %src)
%max = call bfloat @llvm.maxnum.bf16(bfloat %exp, bfloat 0.0)
Expand Down
80 changes: 80 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,83 @@ v_cvt_f32_bf16 v5, v1 div:2
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_bf16 v5, v1 div:2
// GFX1250-ERR-NEXT:{{^}} ^

v_cos_bf16 v1, v2 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_cos_bf16 v1, v2 clamp
// GFX1250-ERR-NEXT:{{^}} ^

v_cos_bf16 v1, v2 mul:2
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX1250-ERR-NEXT:{{^}}v_cos_bf16 v1, v2 mul:2
// GFX1250-ERR-NEXT:{{^}} ^

v_exp_bf16 v1, v2 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_exp_bf16 v1, v2 clamp
// GFX1250-ERR-NEXT:{{^}} ^

v_exp_bf16 v1, v2 mul:2
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX1250-ERR-NEXT:{{^}}v_exp_bf16 v1, v2 mul:2
// GFX1250-ERR-NEXT:{{^}} ^

v_log_bf16 v1, v2 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_log_bf16 v1, v2 clamp
// GFX1250-ERR-NEXT:{{^}} ^

v_log_bf16 v1, v2 mul:2
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX1250-ERR-NEXT:{{^}}v_log_bf16 v1, v2 mul:2
// GFX1250-ERR-NEXT:{{^}} ^

v_rcp_bf16 v1, v2 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_rcp_bf16 v1, v2 clamp
// GFX1250-ERR-NEXT:{{^}} ^

v_rcp_bf16 v1, v2 mul:2
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX1250-ERR-NEXT:{{^}}v_rcp_bf16 v1, v2 mul:2
// GFX1250-ERR-NEXT:{{^}} ^

v_rsq_bf16 v1, v2 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_rsq_bf16 v1, v2 clamp
// GFX1250-ERR-NEXT:{{^}} ^

v_rsq_bf16 v1, v2 mul:2
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX1250-ERR-NEXT:{{^}}v_rsq_bf16 v1, v2 mul:2
// GFX1250-ERR-NEXT:{{^}} ^

v_sin_bf16 v1, v2 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_sin_bf16 v1, v2 clamp
// GFX1250-ERR-NEXT:{{^}} ^

v_sin_bf16 v1, v2 mul:2
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX1250-ERR-NEXT:{{^}}v_sin_bf16 v1, v2 mul:2
// GFX1250-ERR-NEXT:{{^}} ^

v_sqrt_bf16 v1, v2 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_sqrt_bf16 v1, v2 clamp
// GFX1250-ERR-NEXT:{{^}} ^

v_sqrt_bf16 v1, v2 mul:2
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX1250-ERR-NEXT:{{^}}v_sqrt_bf16 v1, v2 mul:2
// GFX1250-ERR-NEXT:{{^}} ^

v_tanh_bf16 v1, v2 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_tanh_bf16 v1, v2 clamp
// GFX1250-ERR-NEXT:{{^}} ^

v_tanh_bf16 v1, v2 mul:2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically you could just move all those deleted instructions to this file, and regenerate the check lines.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that would work. I think we might just don't need that many here

// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX1250-ERR-NEXT:{{^}}v_tanh_bf16 v1, v2 mul:2
// GFX1250-ERR-NEXT:{{^}} ^
72 changes: 0 additions & 72 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -3781,15 +3781,6 @@ v_tanh_bf16_e64 v5, null
v_tanh_bf16_e64 v5, -1
// GFX1250: v_tanh_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xca,0xd5,0xc1,0x00,0x00,0x00]

v_tanh_bf16_e64 v5, 0.5 mul:2
// GFX1250: v_tanh_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xca,0xd5,0xf0,0x00,0x00,0x08]

v_tanh_bf16_e64 v5, src_scc mul:4
// GFX1250: v_tanh_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xca,0xd5,0xfd,0x00,0x00,0x10]

v_tanh_bf16_e64 v255, -|0x8000| clamp div:2
// GFX1250: v_tanh_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]

v_prng_b32_e64 v5, v1
// GFX1250: v_prng_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xcb,0xd5,0x01,0x01,0x00,0x00]

Expand Down Expand Up @@ -3862,15 +3853,6 @@ v_rcp_bf16_e64 v5, null
v_rcp_bf16_e64 v5, -1
// GFX1250: v_rcp_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xf9,0xd5,0xc1,0x00,0x00,0x00]

v_rcp_bf16_e64 v5, 0.5 mul:2
// GFX1250: v_rcp_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xf9,0xd5,0xf0,0x00,0x00,0x08]

v_rcp_bf16_e64 v5, src_scc mul:4
// GFX1250: v_rcp_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xf9,0xd5,0xfd,0x00,0x00,0x10]

v_rcp_bf16_e64 v255, -|0x8000| clamp div:2
// GFX1250: v_rcp_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xf9,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]

v_sqrt_bf16_e64 v5, v1
// GFX1250: v_sqrt_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x01,0x00,0x00]

Expand Down Expand Up @@ -3907,15 +3889,6 @@ v_sqrt_bf16_e64 v5, null
v_sqrt_bf16_e64 v5, -1
// GFX1250: v_sqrt_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfa,0xd5,0xc1,0x00,0x00,0x00]

v_sqrt_bf16_e64 v5, 0.5 mul:2
// GFX1250: v_sqrt_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfa,0xd5,0xf0,0x00,0x00,0x08]

v_sqrt_bf16_e64 v5, src_scc mul:4
// GFX1250: v_sqrt_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfa,0xd5,0xfd,0x00,0x00,0x10]

v_sqrt_bf16_e64 v255, -|0x8000| clamp div:2
// GFX1250: v_sqrt_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfa,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]

v_rsq_bf16_e64 v5, v1
// GFX1250: v_rsq_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfb,0xd5,0x01,0x01,0x00,0x00]

Expand Down Expand Up @@ -3952,15 +3925,6 @@ v_rsq_bf16_e64 v5, null
v_rsq_bf16_e64 v5, -1
// GFX1250: v_rsq_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfb,0xd5,0xc1,0x00,0x00,0x00]

v_rsq_bf16_e64 v5, 0.5 mul:2
// GFX1250: v_rsq_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfb,0xd5,0xf0,0x00,0x00,0x08]

v_rsq_bf16_e64 v5, src_scc mul:4
// GFX1250: v_rsq_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfb,0xd5,0xfd,0x00,0x00,0x10]

v_rsq_bf16_e64 v255, -|0x8000| clamp div:2
// GFX1250: v_rsq_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfb,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]

v_log_bf16_e64 v5, v1
// GFX1250: v_log_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x01,0x00,0x00]

Expand Down Expand Up @@ -3997,15 +3961,6 @@ v_log_bf16_e64 v5, null
v_log_bf16_e64 v5, -1
// GFX1250: v_log_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfc,0xd5,0xc1,0x00,0x00,0x00]

v_log_bf16_e64 v5, 0.5 mul:2
// GFX1250: v_log_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfc,0xd5,0xf0,0x00,0x00,0x08]

v_log_bf16_e64 v5, src_scc mul:4
// GFX1250: v_log_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfc,0xd5,0xfd,0x00,0x00,0x10]

v_log_bf16_e64 v255, -|0x8000| clamp div:2
// GFX1250: v_log_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfc,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]

v_exp_bf16_e64 v5, v1
// GFX1250: v_exp_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x01,0x00,0x00]

Expand Down Expand Up @@ -4042,15 +3997,6 @@ v_exp_bf16_e64 v5, null
v_exp_bf16_e64 v5, -1
// GFX1250: v_exp_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfd,0xd5,0xc1,0x00,0x00,0x00]

v_exp_bf16_e64 v5, 0.5 mul:2
// GFX1250: v_exp_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfd,0xd5,0xf0,0x00,0x00,0x08]

v_exp_bf16_e64 v5, src_scc mul:4
// GFX1250: v_exp_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfd,0xd5,0xfd,0x00,0x00,0x10]

v_exp_bf16_e64 v255, -|0x8000| clamp div:2
// GFX1250: v_exp_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfd,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]

v_sin_bf16_e64 v5, v1
// GFX1250: v_sin_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfe,0xd5,0x01,0x01,0x00,0x00]

Expand Down Expand Up @@ -4087,15 +4033,6 @@ v_sin_bf16_e64 v5, null
v_sin_bf16_e64 v5, -1
// GFX1250: v_sin_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfe,0xd5,0xc1,0x00,0x00,0x00]

v_sin_bf16_e64 v5, 0.5 mul:2
// GFX1250: v_sin_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfe,0xd5,0xf0,0x00,0x00,0x08]

v_sin_bf16_e64 v5, src_scc mul:4
// GFX1250: v_sin_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfe,0xd5,0xfd,0x00,0x00,0x10]

v_sin_bf16_e64 v255, -|0x8000| clamp div:2
// GFX1250: v_sin_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfe,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]

v_cos_bf16_e64 v5, v1
// GFX1250: v_cos_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00]

Expand Down Expand Up @@ -4132,15 +4069,6 @@ v_cos_bf16_e64 v5, null
v_cos_bf16_e64 v5, -1
// GFX1250: v_cos_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00]

v_cos_bf16_e64 v5, 0.5 mul:2
// GFX1250: v_cos_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08]

v_cos_bf16_e64 v5, src_scc mul:4
// GFX1250: v_cos_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10]

v_cos_bf16_e64 v255, -|0x8000| clamp div:2
// GFX1250: v_cos_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]

v_cvt_f32_bf16_e64 v5, v1
// GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00]

Expand Down
Loading