Skip to content

Commit 9a9a695

Browse files
committed
fix trans nomod/noclamp
1 parent d406c15 commit 9a9a695

13 files changed

+121
-827
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,13 @@ defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16",
513513
defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16",
514514
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>;
515515

516+
517+
let HasClamp = 0, HasOMod = 0 in {
518+
def V_TRANS_BF16_Profile : VOPProfile <[bf16, bf16, untyped, untyped]>;
519+
def V_TRANS_BF16_t16_Profile : VOPProfile_True16 <VOP_BF16_BF16>;
520+
def V_TRANS_BF16_fake16_Profile : VOPProfile_Fake16 <VOP_BF16_BF16>;
521+
}
522+
516523
let TRANS = 1, SchedRW = [WriteTrans32] in {
517524
defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
518525
defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
@@ -527,14 +534,30 @@ defm V_TANH_F16 : VOP1Inst_t16 <"v_tanh_f16", VOP_F16_F16, int_amdgcn_tanh>;
527534
}
528535

529536
let SubtargetPredicate = HasBF16TransInsts in {
530-
defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>;
531-
defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
532-
defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>;
533-
defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>;
534-
defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>;
535-
defm V_EXP_BF16 : VOP1Inst_t16 <"v_exp_bf16", VOP_BF16_BF16, AMDGPUexpf16>;
536-
defm V_SIN_BF16 : VOP1Inst_t16 <"v_sin_bf16", VOP_BF16_BF16, AMDGPUsin>;
537-
defm V_COS_BF16 : VOP1Inst_t16 <"v_cos_bf16", VOP_BF16_BF16, AMDGPUcos>;
537+
defm V_TANH_BF16 : VOP1Inst_t16_with_profiles<"v_tanh_bf16", V_TRANS_BF16_Profile,
538+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
539+
int_amdgcn_tanh>;
540+
defm V_RCP_BF16 : VOP1Inst_t16_with_profiles<"v_rcp_bf16", V_TRANS_BF16_Profile,
541+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
542+
AMDGPUrcp>;
543+
defm V_SQRT_BF16 : VOP1Inst_t16_with_profiles<"v_sqrt_bf16", V_TRANS_BF16_Profile,
544+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
545+
any_amdgcn_sqrt>;
546+
defm V_RSQ_BF16 : VOP1Inst_t16_with_profiles<"v_rsq_bf16", V_TRANS_BF16_Profile,
547+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
548+
AMDGPUrsq>;
549+
defm V_LOG_BF16 : VOP1Inst_t16_with_profiles<"v_log_bf16", V_TRANS_BF16_Profile,
550+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
551+
AMDGPUlogf16>;
552+
defm V_EXP_BF16 : VOP1Inst_t16_with_profiles<"v_exp_bf16", V_TRANS_BF16_Profile,
553+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
554+
AMDGPUexpf16>;
555+
defm V_SIN_BF16 : VOP1Inst_t16_with_profiles<"v_sin_bf16", V_TRANS_BF16_Profile,
556+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
557+
AMDGPUsin>;
558+
defm V_COS_BF16 : VOP1Inst_t16_with_profiles<"v_cos_bf16", V_TRANS_BF16_Profile,
559+
V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
560+
AMDGPUcos>;
538561
}
539562
} // End TRANS = 1, SchedRW = [WriteTrans32]
540563
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1357,8 +1357,12 @@ class VOPBinOpClampPat<SDPatternOperator node, Instruction inst, ValueType vt> :
13571357

13581358
class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
13591359
dag src0 = !if(P.HasOMod,
1360-
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
1361-
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp));
1360+
!if(P.HasClamp,
1361+
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
1362+
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i32:$omod)),
1363+
!if(P.HasClamp,
1364+
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
1365+
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers)));
13621366

13631367
list<dag> ret3 = [(set P.DstVT:$vdst,
13641368
(DivergentFragOrOp<node, P>.ret (P.Src0VT src0),

llvm/test/CodeGen/AMDGPU/bf16-math.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,10 @@ define amdgpu_ps float @test_clamp_v2bf16_s(<2 x bfloat> inreg %src) {
368368
define amdgpu_ps bfloat @test_clamp_bf16_folding(bfloat %src) {
369369
; GCN-LABEL: test_clamp_bf16_folding:
370370
; GCN: ; %bb.0:
371-
; GCN-NEXT: v_exp_bf16_e64 v0, v0 clamp
371+
; GCN-NEXT: v_exp_bf16_e32 v0, v0
372+
; GCN-NEXT: v_nop
373+
; GCN-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
374+
; GCN-NEXT: v_pk_max_num_bf16 v0, v0, v0 clamp
372375
; GCN-NEXT: ; return to shader part epilog
373376
%exp = call bfloat @llvm.exp2.bf16(bfloat %src)
374377
%max = call bfloat @llvm.maxnum.bf16(bfloat %exp, bfloat 0.0)

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,83 @@ v_cvt_f32_bf16 v5, v1 div:2
3434
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
3535
// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_bf16 v5, v1 div:2
3636
// GFX1250-ERR-NEXT:{{^}} ^
37+
38+
v_cos_bf16 v1, v2 clamp
39+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
40+
// GFX1250-ERR-NEXT:{{^}}v_cos_bf16 v1, v2 clamp
41+
// GFX1250-ERR-NEXT:{{^}} ^
42+
43+
v_cos_bf16 v1, v2 mul:2
44+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
45+
// GFX1250-ERR-NEXT:{{^}}v_cos_bf16 v1, v2 mul:2
46+
// GFX1250-ERR-NEXT:{{^}} ^
47+
48+
v_exp_bf16 v1, v2 clamp
49+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
50+
// GFX1250-ERR-NEXT:{{^}}v_exp_bf16 v1, v2 clamp
51+
// GFX1250-ERR-NEXT:{{^}} ^
52+
53+
v_exp_bf16 v1, v2 mul:2
54+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
55+
// GFX1250-ERR-NEXT:{{^}}v_exp_bf16 v1, v2 mul:2
56+
// GFX1250-ERR-NEXT:{{^}} ^
57+
58+
v_log_bf16 v1, v2 clamp
59+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
60+
// GFX1250-ERR-NEXT:{{^}}v_log_bf16 v1, v2 clamp
61+
// GFX1250-ERR-NEXT:{{^}} ^
62+
63+
v_log_bf16 v1, v2 mul:2
64+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
65+
// GFX1250-ERR-NEXT:{{^}}v_log_bf16 v1, v2 mul:2
66+
// GFX1250-ERR-NEXT:{{^}} ^
67+
68+
v_rcp_bf16 v1, v2 clamp
69+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
70+
// GFX1250-ERR-NEXT:{{^}}v_rcp_bf16 v1, v2 clamp
71+
// GFX1250-ERR-NEXT:{{^}} ^
72+
73+
v_rcp_bf16 v1, v2 mul:2
74+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
75+
// GFX1250-ERR-NEXT:{{^}}v_rcp_bf16 v1, v2 mul:2
76+
// GFX1250-ERR-NEXT:{{^}} ^
77+
78+
v_rsq_bf16 v1, v2 clamp
79+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
80+
// GFX1250-ERR-NEXT:{{^}}v_rsq_bf16 v1, v2 clamp
81+
// GFX1250-ERR-NEXT:{{^}} ^
82+
83+
v_rsq_bf16 v1, v2 mul:2
84+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
85+
// GFX1250-ERR-NEXT:{{^}}v_rsq_bf16 v1, v2 mul:2
86+
// GFX1250-ERR-NEXT:{{^}} ^
87+
88+
v_sin_bf16 v1, v2 clamp
89+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
90+
// GFX1250-ERR-NEXT:{{^}}v_sin_bf16 v1, v2 clamp
91+
// GFX1250-ERR-NEXT:{{^}} ^
92+
93+
v_sin_bf16 v1, v2 mul:2
94+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
95+
// GFX1250-ERR-NEXT:{{^}}v_sin_bf16 v1, v2 mul:2
96+
// GFX1250-ERR-NEXT:{{^}} ^
97+
98+
v_sqrt_bf16 v1, v2 clamp
99+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
100+
// GFX1250-ERR-NEXT:{{^}}v_sqrt_bf16 v1, v2 clamp
101+
// GFX1250-ERR-NEXT:{{^}} ^
102+
103+
v_sqrt_bf16 v1, v2 mul:2
104+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
105+
// GFX1250-ERR-NEXT:{{^}}v_sqrt_bf16 v1, v2 mul:2
106+
// GFX1250-ERR-NEXT:{{^}} ^
107+
108+
v_tanh_bf16 v1, v2 clamp
109+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
110+
// GFX1250-ERR-NEXT:{{^}}v_tanh_bf16 v1, v2 clamp
111+
// GFX1250-ERR-NEXT:{{^}} ^
112+
113+
v_tanh_bf16 v1, v2 mul:2
114+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
115+
// GFX1250-ERR-NEXT:{{^}}v_tanh_bf16 v1, v2 mul:2
116+
// GFX1250-ERR-NEXT:{{^}} ^

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -3781,15 +3781,6 @@ v_tanh_bf16_e64 v5, null
37813781
v_tanh_bf16_e64 v5, -1
37823782
// GFX1250: v_tanh_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xca,0xd5,0xc1,0x00,0x00,0x00]
37833783

3784-
v_tanh_bf16_e64 v5, 0.5 mul:2
3785-
// GFX1250: v_tanh_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xca,0xd5,0xf0,0x00,0x00,0x08]
3786-
3787-
v_tanh_bf16_e64 v5, src_scc mul:4
3788-
// GFX1250: v_tanh_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xca,0xd5,0xfd,0x00,0x00,0x10]
3789-
3790-
v_tanh_bf16_e64 v255, -|0x8000| clamp div:2
3791-
// GFX1250: v_tanh_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
3792-
37933784
v_prng_b32_e64 v5, v1
37943785
// GFX1250: v_prng_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xcb,0xd5,0x01,0x01,0x00,0x00]
37953786

@@ -3862,15 +3853,6 @@ v_rcp_bf16_e64 v5, null
38623853
v_rcp_bf16_e64 v5, -1
38633854
// GFX1250: v_rcp_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xf9,0xd5,0xc1,0x00,0x00,0x00]
38643855

3865-
v_rcp_bf16_e64 v5, 0.5 mul:2
3866-
// GFX1250: v_rcp_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xf9,0xd5,0xf0,0x00,0x00,0x08]
3867-
3868-
v_rcp_bf16_e64 v5, src_scc mul:4
3869-
// GFX1250: v_rcp_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xf9,0xd5,0xfd,0x00,0x00,0x10]
3870-
3871-
v_rcp_bf16_e64 v255, -|0x8000| clamp div:2
3872-
// GFX1250: v_rcp_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xf9,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
3873-
38743856
v_sqrt_bf16_e64 v5, v1
38753857
// GFX1250: v_sqrt_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x01,0x00,0x00]
38763858

@@ -3907,15 +3889,6 @@ v_sqrt_bf16_e64 v5, null
39073889
v_sqrt_bf16_e64 v5, -1
39083890
// GFX1250: v_sqrt_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfa,0xd5,0xc1,0x00,0x00,0x00]
39093891

3910-
v_sqrt_bf16_e64 v5, 0.5 mul:2
3911-
// GFX1250: v_sqrt_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfa,0xd5,0xf0,0x00,0x00,0x08]
3912-
3913-
v_sqrt_bf16_e64 v5, src_scc mul:4
3914-
// GFX1250: v_sqrt_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfa,0xd5,0xfd,0x00,0x00,0x10]
3915-
3916-
v_sqrt_bf16_e64 v255, -|0x8000| clamp div:2
3917-
// GFX1250: v_sqrt_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfa,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
3918-
39193892
v_rsq_bf16_e64 v5, v1
39203893
// GFX1250: v_rsq_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfb,0xd5,0x01,0x01,0x00,0x00]
39213894

@@ -3952,15 +3925,6 @@ v_rsq_bf16_e64 v5, null
39523925
v_rsq_bf16_e64 v5, -1
39533926
// GFX1250: v_rsq_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfb,0xd5,0xc1,0x00,0x00,0x00]
39543927

3955-
v_rsq_bf16_e64 v5, 0.5 mul:2
3956-
// GFX1250: v_rsq_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfb,0xd5,0xf0,0x00,0x00,0x08]
3957-
3958-
v_rsq_bf16_e64 v5, src_scc mul:4
3959-
// GFX1250: v_rsq_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfb,0xd5,0xfd,0x00,0x00,0x10]
3960-
3961-
v_rsq_bf16_e64 v255, -|0x8000| clamp div:2
3962-
// GFX1250: v_rsq_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfb,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
3963-
39643928
v_log_bf16_e64 v5, v1
39653929
// GFX1250: v_log_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x01,0x00,0x00]
39663930

@@ -3997,15 +3961,6 @@ v_log_bf16_e64 v5, null
39973961
v_log_bf16_e64 v5, -1
39983962
// GFX1250: v_log_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfc,0xd5,0xc1,0x00,0x00,0x00]
39993963

4000-
v_log_bf16_e64 v5, 0.5 mul:2
4001-
// GFX1250: v_log_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfc,0xd5,0xf0,0x00,0x00,0x08]
4002-
4003-
v_log_bf16_e64 v5, src_scc mul:4
4004-
// GFX1250: v_log_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfc,0xd5,0xfd,0x00,0x00,0x10]
4005-
4006-
v_log_bf16_e64 v255, -|0x8000| clamp div:2
4007-
// GFX1250: v_log_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfc,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4008-
40093964
v_exp_bf16_e64 v5, v1
40103965
// GFX1250: v_exp_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x01,0x00,0x00]
40113966

@@ -4042,15 +3997,6 @@ v_exp_bf16_e64 v5, null
40423997
v_exp_bf16_e64 v5, -1
40433998
// GFX1250: v_exp_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfd,0xd5,0xc1,0x00,0x00,0x00]
40443999

4045-
v_exp_bf16_e64 v5, 0.5 mul:2
4046-
// GFX1250: v_exp_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfd,0xd5,0xf0,0x00,0x00,0x08]
4047-
4048-
v_exp_bf16_e64 v5, src_scc mul:4
4049-
// GFX1250: v_exp_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfd,0xd5,0xfd,0x00,0x00,0x10]
4050-
4051-
v_exp_bf16_e64 v255, -|0x8000| clamp div:2
4052-
// GFX1250: v_exp_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfd,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4053-
40544000
v_sin_bf16_e64 v5, v1
40554001
// GFX1250: v_sin_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfe,0xd5,0x01,0x01,0x00,0x00]
40564002

@@ -4087,15 +4033,6 @@ v_sin_bf16_e64 v5, null
40874033
v_sin_bf16_e64 v5, -1
40884034
// GFX1250: v_sin_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfe,0xd5,0xc1,0x00,0x00,0x00]
40894035

4090-
v_sin_bf16_e64 v5, 0.5 mul:2
4091-
// GFX1250: v_sin_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfe,0xd5,0xf0,0x00,0x00,0x08]
4092-
4093-
v_sin_bf16_e64 v5, src_scc mul:4
4094-
// GFX1250: v_sin_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfe,0xd5,0xfd,0x00,0x00,0x10]
4095-
4096-
v_sin_bf16_e64 v255, -|0x8000| clamp div:2
4097-
// GFX1250: v_sin_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfe,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4098-
40994036
v_cos_bf16_e64 v5, v1
41004037
// GFX1250: v_cos_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00]
41014038

@@ -4132,15 +4069,6 @@ v_cos_bf16_e64 v5, null
41324069
v_cos_bf16_e64 v5, -1
41334070
// GFX1250: v_cos_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00]
41344071

4135-
v_cos_bf16_e64 v5, 0.5 mul:2
4136-
// GFX1250: v_cos_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08]
4137-
4138-
v_cos_bf16_e64 v5, src_scc mul:4
4139-
// GFX1250: v_cos_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10]
4140-
4141-
v_cos_bf16_e64 v255, -|0x8000| clamp div:2
4142-
// GFX1250: v_cos_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
4143-
41444072
v_cvt_f32_bf16_e64 v5, v1
41454073
// GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00]
41464074

0 commit comments

Comments
 (0)