Skip to content

Commit 44ee1eb

Browse files
committed
fix trans nomod/noclamp
1 parent 22079e3 commit 44ee1eb

12 files changed

+41
-827
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,13 @@ defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16",
513513
defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16",
514514
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>;
515515

516+
517+
let HasClamp = 0, HasOMod = 0 in {
518+
def V_TRANS_BF16_Profile : VOPProfile <[bf16, bf16, untyped, untyped]>;
519+
def V_TRANS_BF16_t16_Profile : VOPProfile_True16 <VOP_BF16_BF16>;
520+
def V_TRANS_BF16_fake16_Profile : VOPProfile_Fake16 <VOP_BF16_BF16>;
521+
}
522+
516523
let TRANS = 1, SchedRW = [WriteTrans32] in {
517524
defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
518525
defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
@@ -527,14 +534,30 @@ defm V_TANH_F16 : VOP1Inst_t16 <"v_tanh_f16", VOP_F16_F16, int_amdgcn_tanh>;
527534
}
528535

529536
let SubtargetPredicate = HasBF16TransInsts in {
530-
defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>;
531-
defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
532-
defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>;
533-
defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>;
534-
defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>;
535-
defm V_EXP_BF16 : VOP1Inst_t16 <"v_exp_bf16", VOP_BF16_BF16, AMDGPUexpf16>;
536-
defm V_SIN_BF16 : VOP1Inst_t16 <"v_sin_bf16", VOP_BF16_BF16, AMDGPUsin>;
537-
defm V_COS_BF16 : VOP1Inst_t16 <"v_cos_bf16", VOP_BF16_BF16, AMDGPUcos>;
537+
defm V_TANH_BF16 : VOP1Inst_t16_with_profiles<"v_tanh_bf16", V_TRANS_BF16_Profile,
538+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
539+
int_amdgcn_tanh>;
540+
defm V_RCP_BF16 : VOP1Inst_t16_with_profiles<"v_rcp_bf16", V_TRANS_BF16_Profile,
541+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
542+
AMDGPUrcp>;
543+
defm V_SQRT_BF16 : VOP1Inst_t16_with_profiles<"v_sqrt_bf16", V_TRANS_BF16_Profile,
544+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
545+
any_amdgcn_sqrt>;
546+
defm V_RSQ_BF16 : VOP1Inst_t16_with_profiles<"v_rsq_bf16", V_TRANS_BF16_Profile,
547+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
548+
AMDGPUrsq>;
549+
defm V_LOG_BF16 : VOP1Inst_t16_with_profiles<"v_log_bf16", V_TRANS_BF16_Profile,
550+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
551+
AMDGPUlogf16>;
552+
defm V_EXP_BF16 : VOP1Inst_t16_with_profiles<"v_exp_bf16", V_TRANS_BF16_Profile,
553+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
554+
AMDGPUexpf16>;
555+
defm V_SIN_BF16 : VOP1Inst_t16_with_profiles<"v_sin_bf16", V_TRANS_BF16_Profile,
556+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
557+
AMDGPUsin>;
558+
defm V_COS_BF16 : VOP1Inst_t16_with_profiles<"v_cos_bf16", V_TRANS_BF16_Profile,
559+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
560+
AMDGPUcos>;
538561
}
539562
} // End TRANS = 1, SchedRW = [WriteTrans32]
540563
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1357,8 +1357,12 @@ class VOPBinOpClampPat<SDPatternOperator node, Instruction inst, ValueType vt> :
13571357

13581358
class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
13591359
dag src0 = !if(P.HasOMod,
1360-
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
1361-
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp));
1360+
!if(P.HasClamp,
1361+
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
1362+
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i32:$omod)),
1363+
!if(P.HasClamp,
1364+
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
1365+
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers)));
13621366

13631367
list<dag> ret3 = [(set P.DstVT:$vdst,
13641368
(DivergentFragOrOp<node, P>.ret (P.Src0VT src0),

llvm/test/CodeGen/AMDGPU/bf16-math.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,10 @@ define amdgpu_ps float @test_clamp_v2bf16_s(<2 x bfloat> inreg %src) {
368368
define amdgpu_ps bfloat @test_clamp_bf16_folding(bfloat %src) {
369369
; GCN-LABEL: test_clamp_bf16_folding:
370370
; GCN: ; %bb.0:
371-
; GCN-NEXT: v_exp_bf16_e64 v0, v0 clamp
371+
; GCN-NEXT: v_exp_bf16_e32 v0, v0
372+
; GCN-NEXT: v_nop
373+
; GCN-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
374+
; GCN-NEXT: v_pk_max_num_bf16 v0, v0, v0 clamp
372375
; GCN-NEXT: ; return to shader part epilog
373376
%exp = call bfloat @llvm.exp2.bf16(bfloat %src)
374377
%max = call bfloat @llvm.maxnum.bf16(bfloat %exp, bfloat 0.0)

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -3781,15 +3781,6 @@ v_tanh_bf16_e64 v5, null
37813781
v_tanh_bf16_e64 v5, -1
37823782
// GFX1250: v_tanh_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xca,0xd5,0xc1,0x00,0x00,0x00]
37833783

3784-
v_tanh_bf16_e64 v5, 0.5 mul:2
3785-
// GFX1250: v_tanh_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xca,0xd5,0xf0,0x00,0x00,0x08]
3786-
3787-
v_tanh_bf16_e64 v5, src_scc mul:4
3788-
// GFX1250: v_tanh_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xca,0xd5,0xfd,0x00,0x00,0x10]
3789-
3790-
v_tanh_bf16_e64 v255, -|0x8000| clamp div:2
3791-
// GFX1250: v_tanh_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
3792-
37933784
v_prng_b32_e64 v5, v1
37943785
// GFX1250: v_prng_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xcb,0xd5,0x01,0x01,0x00,0x00]
37953786

@@ -3862,15 +3853,6 @@ v_rcp_bf16_e64 v5, null
38623853
v_rcp_bf16_e64 v5, -1
38633854
// GFX1250: v_rcp_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xf9,0xd5,0xc1,0x00,0x00,0x00]
38643855

3865-
v_rcp_bf16_e64 v5, 0.5 mul:2
3866-
// GFX1250: v_rcp_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xf9,0xd5,0xf0,0x00,0x00,0x08]
3867-
3868-
v_rcp_bf16_e64 v5, src_scc mul:4
3869-
// GFX1250: v_rcp_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xf9,0xd5,0xfd,0x00,0x00,0x10]
3870-
3871-
v_rcp_bf16_e64 v255, -|0x8000| clamp div:2
3872-
// GFX1250: v_rcp_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xf9,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
3873-
38743856
v_sqrt_bf16_e64 v5, v1
38753857
// GFX1250: v_sqrt_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x01,0x00,0x00]
38763858

@@ -3907,15 +3889,6 @@ v_sqrt_bf16_e64 v5, null
39073889
v_sqrt_bf16_e64 v5, -1
39083890
// GFX1250: v_sqrt_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfa,0xd5,0xc1,0x00,0x00,0x00]
39093891

3910-
v_sqrt_bf16_e64 v5, 0.5 mul:2
3911-
// GFX1250: v_sqrt_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfa,0xd5,0xf0,0x00,0x00,0x08]
3912-
3913-
v_sqrt_bf16_e64 v5, src_scc mul:4
3914-
// GFX1250: v_sqrt_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfa,0xd5,0xfd,0x00,0x00,0x10]
3915-
3916-
v_sqrt_bf16_e64 v255, -|0x8000| clamp div:2
3917-
// GFX1250: v_sqrt_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfa,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
3918-
39193892
v_rsq_bf16_e64 v5, v1
39203893
// GFX1250: v_rsq_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfb,0xd5,0x01,0x01,0x00,0x00]
39213894

@@ -3952,15 +3925,6 @@ v_rsq_bf16_e64 v5, null
39523925
v_rsq_bf16_e64 v5, -1
39533926
// GFX1250: v_rsq_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfb,0xd5,0xc1,0x00,0x00,0x00]
39543927

3955-
v_rsq_bf16_e64 v5, 0.5 mul:2
3956-
// GFX1250: v_rsq_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfb,0xd5,0xf0,0x00,0x00,0x08]
3957-
3958-
v_rsq_bf16_e64 v5, src_scc mul:4
3959-
// GFX1250: v_rsq_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfb,0xd5,0xfd,0x00,0x00,0x10]
3960-
3961-
v_rsq_bf16_e64 v255, -|0x8000| clamp div:2
3962-
// GFX1250: v_rsq_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfb,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
3963-
39643928
v_log_bf16_e64 v5, v1
39653929
// GFX1250: v_log_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x01,0x00,0x00]
39663930

@@ -3997,15 +3961,6 @@ v_log_bf16_e64 v5, null
39973961
v_log_bf16_e64 v5, -1
39983962
// GFX1250: v_log_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfc,0xd5,0xc1,0x00,0x00,0x00]
39993963

4000-
v_log_bf16_e64 v5, 0.5 mul:2
4001-
// GFX1250: v_log_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfc,0xd5,0xf0,0x00,0x00,0x08]
4002-
4003-
v_log_bf16_e64 v5, src_scc mul:4
4004-
// GFX1250: v_log_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfc,0xd5,0xfd,0x00,0x00,0x10]
4005-
4006-
v_log_bf16_e64 v255, -|0x8000| clamp div:2
4007-
// GFX1250: v_log_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfc,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4008-
40093964
v_exp_bf16_e64 v5, v1
40103965
// GFX1250: v_exp_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x01,0x00,0x00]
40113966

@@ -4042,15 +3997,6 @@ v_exp_bf16_e64 v5, null
40423997
v_exp_bf16_e64 v5, -1
40433998
// GFX1250: v_exp_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfd,0xd5,0xc1,0x00,0x00,0x00]
40443999

4045-
v_exp_bf16_e64 v5, 0.5 mul:2
4046-
// GFX1250: v_exp_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfd,0xd5,0xf0,0x00,0x00,0x08]
4047-
4048-
v_exp_bf16_e64 v5, src_scc mul:4
4049-
// GFX1250: v_exp_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfd,0xd5,0xfd,0x00,0x00,0x10]
4050-
4051-
v_exp_bf16_e64 v255, -|0x8000| clamp div:2
4052-
// GFX1250: v_exp_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfd,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4053-
40544000
v_sin_bf16_e64 v5, v1
40554001
// GFX1250: v_sin_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfe,0xd5,0x01,0x01,0x00,0x00]
40564002

@@ -4087,15 +4033,6 @@ v_sin_bf16_e64 v5, null
40874033
v_sin_bf16_e64 v5, -1
40884034
// GFX1250: v_sin_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfe,0xd5,0xc1,0x00,0x00,0x00]
40894035

4090-
v_sin_bf16_e64 v5, 0.5 mul:2
4091-
// GFX1250: v_sin_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfe,0xd5,0xf0,0x00,0x00,0x08]
4092-
4093-
v_sin_bf16_e64 v5, src_scc mul:4
4094-
// GFX1250: v_sin_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfe,0xd5,0xfd,0x00,0x00,0x10]
4095-
4096-
v_sin_bf16_e64 v255, -|0x8000| clamp div:2
4097-
// GFX1250: v_sin_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfe,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4098-
40994036
v_cos_bf16_e64 v5, v1
41004037
// GFX1250: v_cos_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00]
41014038

@@ -4132,15 +4069,6 @@ v_cos_bf16_e64 v5, null
41324069
v_cos_bf16_e64 v5, -1
41334070
// GFX1250: v_cos_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00]
41344071

4135-
v_cos_bf16_e64 v5, 0.5 mul:2
4136-
// GFX1250: v_cos_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08]
4137-
4138-
v_cos_bf16_e64 v5, src_scc mul:4
4139-
// GFX1250: v_cos_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10]
4140-
4141-
v_cos_bf16_e64 v255, -|0x8000| clamp div:2
4142-
// GFX1250: v_cos_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4143-
41444072
v_cvt_f32_bf16_e64 v5, v1
41454073
// GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00]
41464074

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -3952,15 +3952,6 @@ v_tanh_bf16_e64 v5.l, null
39523952
v_tanh_bf16_e64 v5.l, -1
39533953
// GFX1250: v_tanh_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xca,0xd5,0xc1,0x00,0x00,0x00]
39543954

3955-
v_tanh_bf16_e64 v5.l, 0.5 mul:2
3956-
// GFX1250: v_tanh_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xca,0xd5,0xf0,0x00,0x00,0x08]
3957-
3958-
v_tanh_bf16_e64 v5.l, src_scc mul:4
3959-
// GFX1250: v_tanh_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xca,0xd5,0xfd,0x00,0x00,0x10]
3960-
3961-
v_tanh_bf16_e64 v255.l, -|0x8000| clamp div:2
3962-
// GFX1250: v_tanh_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
3963-
39643955
v_tanh_bf16 v5.l, v128.h
39653956
// GFX1250: v_tanh_bf16_e64 v5.l, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0xca,0xd5,0x80,0x01,0x00,0x00]
39663957

@@ -4036,15 +4027,6 @@ v_rcp_bf16_e64 v5.l, null
40364027
v_rcp_bf16_e64 v5.l, -1
40374028
// GFX1250: v_rcp_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xf9,0xd5,0xc1,0x00,0x00,0x00]
40384029

4039-
v_rcp_bf16_e64 v5.l, 0.5 mul:2
4040-
// GFX1250: v_rcp_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xf9,0xd5,0xf0,0x00,0x00,0x08]
4041-
4042-
v_rcp_bf16_e64 v5.l, src_scc mul:4
4043-
// GFX1250: v_rcp_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xf9,0xd5,0xfd,0x00,0x00,0x10]
4044-
4045-
v_rcp_bf16_e64 v255.l, -|0x8000| clamp div:2
4046-
// GFX1250: v_rcp_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xf9,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4047-
40484030
v_rcp_bf16 v5.h, v128.h
40494031
// GFX1250: v_rcp_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xf9,0xd5,0x80,0x01,0x00,0x00]
40504032

@@ -4084,15 +4066,6 @@ v_sqrt_bf16_e64 v5.l, null
40844066
v_sqrt_bf16_e64 v5.l, -1
40854067
// GFX1250: v_sqrt_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xfa,0xd5,0xc1,0x00,0x00,0x00]
40864068

4087-
v_sqrt_bf16_e64 v5.l, 0.5 mul:2
4088-
// GFX1250: v_sqrt_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xfa,0xd5,0xf0,0x00,0x00,0x08]
4089-
4090-
v_sqrt_bf16_e64 v5.l, src_scc mul:4
4091-
// GFX1250: v_sqrt_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xfa,0xd5,0xfd,0x00,0x00,0x10]
4092-
4093-
v_sqrt_bf16_e64 v255.l, -|0x8000| clamp div:2
4094-
// GFX1250: v_sqrt_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfa,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4095-
40964069
v_sqrt_bf16 v5.h, v128.h
40974070
// GFX1250: v_sqrt_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfa,0xd5,0x80,0x01,0x00,0x00]
40984071

@@ -4132,15 +4105,6 @@ v_rsq_bf16_e64 v5.l, null
41324105
v_rsq_bf16_e64 v5.l, -1
41334106
// GFX1250: v_rsq_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xfb,0xd5,0xc1,0x00,0x00,0x00]
41344107

4135-
v_rsq_bf16_e64 v5.l, 0.5 mul:2
4136-
// GFX1250: v_rsq_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xfb,0xd5,0xf0,0x00,0x00,0x08]
4137-
4138-
v_rsq_bf16_e64 v5.l, src_scc mul:4
4139-
// GFX1250: v_rsq_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xfb,0xd5,0xfd,0x00,0x00,0x10]
4140-
4141-
v_rsq_bf16_e64 v255.l, -|0x8000| clamp div:2
4142-
// GFX1250: v_rsq_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfb,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4143-
41444108
v_rsq_bf16 v5.h, v128.h
41454109
// GFX1250: v_rsq_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfb,0xd5,0x80,0x01,0x00,0x00]
41464110

@@ -4180,15 +4144,6 @@ v_log_bf16_e64 v5.l, null
41804144
v_log_bf16_e64 v5.l, -1
41814145
// GFX1250: v_log_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xfc,0xd5,0xc1,0x00,0x00,0x00]
41824146

4183-
v_log_bf16_e64 v5.l, 0.5 mul:2
4184-
// GFX1250: v_log_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xfc,0xd5,0xf0,0x00,0x00,0x08]
4185-
4186-
v_log_bf16_e64 v5.l, src_scc mul:4
4187-
// GFX1250: v_log_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xfc,0xd5,0xfd,0x00,0x00,0x10]
4188-
4189-
v_log_bf16_e64 v255.l, -|0x8000| clamp div:2
4190-
// GFX1250: v_log_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfc,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4191-
41924147
v_log_bf16 v5.h, v128.h
41934148
// GFX1250: v_log_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfc,0xd5,0x80,0x01,0x00,0x00]
41944149

@@ -4228,15 +4183,6 @@ v_exp_bf16_e64 v5.l, null
42284183
v_exp_bf16_e64 v5.l, -1
42294184
// GFX1250: v_exp_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xfd,0xd5,0xc1,0x00,0x00,0x00]
42304185

4231-
v_exp_bf16_e64 v5.l, 0.5 mul:2
4232-
// GFX1250: v_exp_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xfd,0xd5,0xf0,0x00,0x00,0x08]
4233-
4234-
v_exp_bf16_e64 v5.l, src_scc mul:4
4235-
// GFX1250: v_exp_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xfd,0xd5,0xfd,0x00,0x00,0x10]
4236-
4237-
v_exp_bf16_e64 v255.l, -|0x8000| clamp div:2
4238-
// GFX1250: v_exp_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfd,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4239-
42404186
v_exp_bf16 v5.h, v128.h
42414187
// GFX1250: v_exp_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfd,0xd5,0x80,0x01,0x00,0x00]
42424188

@@ -4276,15 +4222,6 @@ v_sin_bf16_e64 v5.l, null
42764222
v_sin_bf16_e64 v5.l, -1
42774223
// GFX1250: v_sin_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xfe,0xd5,0xc1,0x00,0x00,0x00]
42784224

4279-
v_sin_bf16_e64 v5.l, 0.5 mul:2
4280-
// GFX1250: v_sin_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xfe,0xd5,0xf0,0x00,0x00,0x08]
4281-
4282-
v_sin_bf16_e64 v5.l, src_scc mul:4
4283-
// GFX1250: v_sin_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xfe,0xd5,0xfd,0x00,0x00,0x10]
4284-
4285-
v_sin_bf16_e64 v255.l, -|0x8000| clamp div:2
4286-
// GFX1250: v_sin_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfe,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4287-
42884225
v_sin_bf16 v5.h, v128.h
42894226
// GFX1250: v_sin_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfe,0xd5,0x80,0x01,0x00,0x00]
42904227

@@ -4324,15 +4261,6 @@ v_cos_bf16_e64 v5.l, null
43244261
v_cos_bf16_e64 v5.l, -1
43254262
// GFX1250: v_cos_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00]
43264263

4327-
v_cos_bf16_e64 v5.l, 0.5 mul:2
4328-
// GFX1250: v_cos_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08]
4329-
4330-
v_cos_bf16_e64 v5.l, src_scc mul:4
4331-
// GFX1250: v_cos_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10]
4332-
4333-
v_cos_bf16_e64 v255.l, -|0x8000| clamp div:2
4334-
// GFX1250: v_cos_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4335-
43364264
v_cos_bf16_e64 v5.h, v128.h
43374265
// GFX1250: v_cos_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xff,0xd5,0x80,0x01,0x00,0x00]
43384266

0 commit comments

Comments
 (0)