Skip to content

Commit 362d8fb

Browse files
arsenmPravin Jagtap
andauthored
AMDGPU: MC support for v_cvt_scalef32_pk_{fp|bf}8_{f|bf}16 of gfx950. (#117384)
OPSEL ASM Syntax: opsel:[x,y,z] where, opsel[z] = Inst{14} = src0_modifier{3} Note: Conventional Inst{13} i.e. OPSEL[2] is ignored in asm syntax. Co-authored-by: Pravin Jagtap <[email protected]>
1 parent 70fef78 commit 362d8fb

File tree

4 files changed

+237
-0
lines changed

4 files changed

+237
-0
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -912,18 +912,35 @@ def VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32,
912912
let HasOMod = 0;
913913
}
914914

915+
def VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile : VOP3_Profile<VOPProfile<[i32, v2f16, f32, untyped]>,
916+
VOP3_OPSEL> {
917+
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
918+
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
919+
op_sel0:$op_sel);
920+
let HasClamp = 0;
921+
let HasSrc2 = 0;
922+
let HasSrc2Mods = 0;
923+
let HasExtVOP3DPP = 0;
924+
let HasOpSel = 1;
925+
let HasOMod = 0;
926+
}
927+
915928
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
916929
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
917930
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
918931
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
919932
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
933+
defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
934+
defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
920935
}
921936

922937
let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
923938
defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
924939
defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
925940
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
926941
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
942+
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
943+
defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
927944
}
928945

929946
let SubtargetPredicate = isGFX10Plus in {
@@ -1853,10 +1870,14 @@ defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>;
18531870
defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>;
18541871
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>;
18551872
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3OpSel_Real_gfx9 <0x239>;
1873+
defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3OpSel_Real_gfx9 <0x240>;
1874+
defm V_CVT_SCALEF32_PK_FP8_BF16: VOP3OpSel_Real_gfx9 <0x244>;
18561875
}
18571876
let OtherPredicates = [HasBF8ConversionScaleInsts] in {
18581877
defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
18591878
defm V_CVT_SCALEF32_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23c>;
18601879
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>;
18611880
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23a>;
1881+
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3OpSel_Real_gfx9 <0x241>;
1882+
defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>;
18621883
}

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,3 +597,99 @@ v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 op_sel:[1,0,0]
597597
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
598598
// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00]
599599
v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0]
600+
601+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
602+
// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x07,0x02,0x00]
603+
v_cvt_scalef32_pk_fp8_f16 v1, v2, v3
604+
605+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
606+
// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x40,0xd2,0x02,0x07,0x02,0x20]
607+
v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3|
608+
609+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
610+
// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x06,0x01,0x00]
611+
v_cvt_scalef32_pk_fp8_f16 v1, s2, 3
612+
613+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
614+
// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x07,0x02,0x00]
615+
v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 op_sel:[0,0,1]
616+
617+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
618+
// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x40,0xd2,0x02,0x07,0x02,0x20]
619+
v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| op_sel:[0,0,1]
620+
621+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
622+
// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x06,0x01,0x00]
623+
v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 op_sel:[0,0,1]
624+
625+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
626+
// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
627+
v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3
628+
629+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
630+
// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x44,0xd2,0x02,0x07,0x02,0x20]
631+
v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3|
632+
633+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
634+
// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x06,0x01,0x00]
635+
v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3
636+
637+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
638+
// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x07,0x02,0x00]
639+
v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 op_sel:[0,0,1]
640+
641+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
642+
// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x44,0xd2,0x02,0x07,0x02,0x20]
643+
v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| op_sel:[0,0,1]
644+
645+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
646+
// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x06,0x01,0x00]
647+
v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 op_sel:[0,0,1]
648+
649+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
650+
// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x07,0x02,0x00]
651+
v_cvt_scalef32_pk_bf8_f16 v1, v2, v3
652+
653+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
654+
// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x41,0xd2,0x02,0x07,0x02,0x20]
655+
v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3|
656+
657+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
658+
// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x06,0x01,0x00]
659+
v_cvt_scalef32_pk_bf8_f16 v1, s2, 3
660+
661+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
662+
// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x07,0x02,0x00]
663+
v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 op_sel:[0,0,1]
664+
665+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
666+
// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x41,0xd2,0x02,0x07,0x02,0x20]
667+
v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| op_sel:[0,0,1]
668+
669+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
670+
// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x06,0x01,0x00]
671+
v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 op_sel:[0,0,1]
672+
673+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
674+
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x07,0x02,0x00]
675+
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3
676+
677+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
678+
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x45,0xd2,0x02,0x07,0x02,0x20]
679+
v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3|
680+
681+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
682+
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x06,0x01,0x00]
683+
v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3
684+
685+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
686+
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x07,0x02,0x00]
687+
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 op_sel:[0,0,1]
688+
689+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
690+
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x45,0xd2,0x02,0x07,0x02,0x20]
691+
v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1]
692+
693+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
694+
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00]
695+
v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1]

llvm/test/MC/AMDGPU/gfx950_err.s

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,51 @@ v_permlane16_swap_b32_e32 v1, v2 fi:0
2929

3030
// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction
3131
v_permlane16_swap_b32_e32 v1, v2 bound_ctrl:1 fi:1
32+
33+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
34+
v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 clamp
35+
36+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
37+
v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 mul:2
38+
39+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
40+
v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 div:2
41+
42+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
43+
v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 clamp div:2
44+
45+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
46+
v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 clamp
47+
48+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
49+
v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 mul:2
50+
51+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
52+
v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 div:2
53+
54+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
55+
v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 clamp div:2
56+
57+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
58+
v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 clamp
59+
60+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
61+
v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 mul:2
62+
63+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
64+
v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 div:2
65+
66+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
67+
v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 clamp div:2
68+
69+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
70+
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 clamp
71+
72+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
73+
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 mul:2
74+
75+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
76+
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 div:2
77+
78+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
79+
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 clamp div:2

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,75 @@
395395

396396
# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00]
397397
0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00
398+
399+
# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x07,0x02,0x00]
400+
0x01,0x00,0x40,0xd2,0x02,0x07,0x02,0x00
401+
402+
# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x40,0xd2,0x02,0x07,0x02,0x20]
403+
0x01,0x02,0x40,0xd2,0x02,0x07,0x02,0x20
404+
405+
# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x06,0x01,0x00]
406+
0x01,0x00,0x40,0xd2,0x02,0x06,0x01,0x00
407+
408+
# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x07,0x02,0x00]
409+
0x01,0x40,0x40,0xd2,0x02,0x07,0x02,0x00
410+
411+
# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x40,0xd2,0x02,0x07,0x02,0x20]
412+
0x01,0x42,0x40,0xd2,0x02,0x07,0x02,0x20
413+
414+
# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x06,0x01,0x00]
415+
0x01,0x40,0x40,0xd2,0x02,0x06,0x01,0x00
416+
417+
# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
418+
0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00
419+
420+
# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x44,0xd2,0x02,0x07,0x02,0x20]
421+
0x01,0x02,0x44,0xd2,0x02,0x07,0x02,0x20
422+
423+
# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x06,0x01,0x00]
424+
0x01,0x00,0x44,0xd2,0x02,0x06,0x01,0x00
425+
426+
# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x07,0x02,0x00]
427+
0x01,0x40,0x44,0xd2,0x02,0x07,0x02,0x00
428+
429+
# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x44,0xd2,0x02,0x07,0x02,0x20]
430+
0x01,0x42,0x44,0xd2,0x02,0x07,0x02,0x20
431+
432+
# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x06,0x01,0x00]
433+
0x01,0x40,0x44,0xd2,0x02,0x06,0x01,0x00
434+
435+
# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x07,0x02,0x00]
436+
0x01,0x00,0x41,0xd2,0x02,0x07,0x02,0x00
437+
438+
# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x41,0xd2,0x02,0x07,0x02,0x20]
439+
0x01,0x02,0x41,0xd2,0x02,0x07,0x02,0x20
440+
441+
# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x06,0x01,0x00]
442+
0x01,0x00,0x41,0xd2,0x02,0x06,0x01,0x00
443+
444+
# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x07,0x02,0x00]
445+
0x01,0x40,0x41,0xd2,0x02,0x07,0x02,0x00
446+
447+
# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x41,0xd2,0x02,0x07,0x02,0x20]
448+
0x01,0x42,0x41,0xd2,0x02,0x07,0x02,0x20
449+
450+
# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x06,0x01,0x00]
451+
0x01,0x40,0x41,0xd2,0x02,0x06,0x01,0x00
452+
453+
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x07,0x02,0x00]
454+
0x01,0x00,0x45,0xd2,0x02,0x07,0x02,0x00
455+
456+
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x45,0xd2,0x02,0x07,0x02,0x20]
457+
0x01,0x02,0x45,0xd2,0x02,0x07,0x02,0x20
458+
459+
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x06,0x01,0x00]
460+
0x01,0x00,0x45,0xd2,0x02,0x06,0x01,0x00
461+
462+
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x07,0x02,0x00]
463+
0x01,0x40,0x45,0xd2,0x02,0x07,0x02,0x00
464+
465+
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x45,0xd2,0x02,0x07,0x02,0x20]
466+
0x01,0x42,0x45,0xd2,0x02,0x07,0x02,0x20
467+
468+
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00]
469+
0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00

0 commit comments

Comments
 (0)